From 3caa686662f7d937cf7eb852dde437cd66e79a6e Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Thu, 21 Feb 2019 02:10:14 -0500
Subject: restructured sources

---
 CMake/FindAnacondaEnvironment.cmake                | 154 ----
 CMakeLists.txt                                     |   4 +-
 Core/CCPiDefines.h                                 |  35 -
 Core/CMakeLists.txt                                | 151 ----
 Core/inpainters_CPU/Diffusion_Inpaint_core.c       | 322 --------
 Core/inpainters_CPU/Diffusion_Inpaint_core.h       |  61 --
 .../inpainters_CPU/NonlocalMarching_Inpaint_core.c | 188 -----
 .../inpainters_CPU/NonlocalMarching_Inpaint_core.h |  54 --
 Core/regularisers_CPU/Diffus4th_order_core.c       | 250 -------
 Core/regularisers_CPU/Diffus4th_order_core.h       |  55 --
 Core/regularisers_CPU/Diffusion_core.c             | 307 --------
 Core/regularisers_CPU/Diffusion_core.h             |  59 --
 Core/regularisers_CPU/FGP_TV_core.c                | 321 --------
 Core/regularisers_CPU/FGP_TV_core.h                |  63 --
 Core/regularisers_CPU/FGP_dTV_core.c               | 441 -----------
 Core/regularisers_CPU/FGP_dTV_core.h               |  72 --
 Core/regularisers_CPU/LLT_ROF_core.c               | 410 -----------
 Core/regularisers_CPU/LLT_ROF_core.h               |  65 --
 Core/regularisers_CPU/Nonlocal_TV_core.c           | 173 -----
 Core/regularisers_CPU/Nonlocal_TV_core.h           |  61 --
 Core/regularisers_CPU/PatchSelect_core.c           | 345 ---------
 Core/regularisers_CPU/PatchSelect_core.h           |  63 --
 Core/regularisers_CPU/ROF_TV_core.c                | 289 --------
 Core/regularisers_CPU/ROF_TV_core.h                |  57 --
 Core/regularisers_CPU/SB_TV_core.c                 | 368 ---------
 Core/regularisers_CPU/SB_TV_core.h                 |  61 --
 Core/regularisers_CPU/TGV_core.c                   | 487 ------------
 Core/regularisers_CPU/TGV_core.h                   |  73 --
 Core/regularisers_CPU/TNV_core.c                   | 452 ------------
 Core/regularisers_CPU/TNV_core.h                   |  47 --
 Core/regularisers_CPU/utils.c                      | 117 ---
 Core/regularisers_CPU/utils.h                      |  34 -
 Core/regularisers_GPU/Diffus_4thO_GPU_core.cu      | 268 -------
 Core/regularisers_GPU/Diffus_4thO_GPU_core.h       |   8 -
 Core/regularisers_GPU/LLT_ROF_GPU_core.cu          | 473 ------------
 Core/regularisers_GPU/LLT_ROF_GPU_core.h           |   8 -
 Core/regularisers_GPU/NonlDiff_GPU_core.cu         | 345 ---------
 Core/regularisers_GPU/NonlDiff_GPU_core.h          |   8 -
 Core/regularisers_GPU/PatchSelect_GPU_core.cu      | 460 ------------
 Core/regularisers_GPU/PatchSelect_GPU_core.h       |   8 -
 Core/regularisers_GPU/TGV_GPU_core.cu              | 625 ----------------
 Core/regularisers_GPU/TGV_GPU_core.h               |   8 -
 Core/regularisers_GPU/TV_FGP_GPU_core.cu           | 564 --------------
 Core/regularisers_GPU/TV_FGP_GPU_core.h            |   9 -
 Core/regularisers_GPU/TV_ROF_GPU_core.cu           | 358 ---------
 Core/regularisers_GPU/TV_ROF_GPU_core.h            |   8 -
 Core/regularisers_GPU/TV_SB_GPU_core.cu            | 552 --------------
 Core/regularisers_GPU/TV_SB_GPU_core.h             |  10 -
 Core/regularisers_GPU/dTV_FGP_GPU_core.cu          | 741 -------------------
 Core/regularisers_GPU/dTV_FGP_GPU_core.h           |   9 -
 Core/regularisers_GPU/shared.h                     |  42 --
 Wrappers/CMakeLists.txt                            |  19 -
 Wrappers/Matlab/CMakeLists.txt                     | 147 ----
 Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m       | 178 -----
 Wrappers/Matlab/demos/demoMatlab_denoise.m         | 189 -----
 Wrappers/Matlab/demos/demoMatlab_inpaint.m         |  35 -
 Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m |  81 --
 .../Matlab/mex_compile/compileCPU_mex_WINDOWS.m    | 135 ----
 Wrappers/Matlab/mex_compile/compileGPU_mex.m       |  74 --
 .../mex_compile/installed/MEXed_files_location.txt |   0
 .../mex_compile/regularisers_CPU/Diffusion_4thO.c  |  77 --
 .../Matlab/mex_compile/regularisers_CPU/FGP_TV.c   |  97 ---
 .../Matlab/mex_compile/regularisers_CPU/FGP_dTV.c  | 114 ---
 .../Matlab/mex_compile/regularisers_CPU/LLT_ROF.c  |  82 ---
 .../Matlab/mex_compile/regularisers_CPU/NonlDiff.c |  89 ---
 .../mex_compile/regularisers_CPU/NonlDiff_Inp.c    | 103 ---
 .../regularisers_CPU/NonlocalMarching_Inpaint.c    |  84 ---
 .../mex_compile/regularisers_CPU/Nonlocal_TV.c     |  88 ---
 .../mex_compile/regularisers_CPU/PatchSelect.c     |  92 ---
 .../Matlab/mex_compile/regularisers_CPU/ROF_TV.c   |  77 --
 .../Matlab/mex_compile/regularisers_CPU/SB_TV.c    |  91 ---
 Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c |  83 ---
 Wrappers/Matlab/mex_compile/regularisers_CPU/TNV.c |  74 --
 .../mex_compile/regularisers_CPU/TV_energy.c       |  72 --
 .../regularisers_GPU/Diffusion_4thO_GPU.cpp        |  77 --
 .../mex_compile/regularisers_GPU/FGP_TV_GPU.cpp    |  97 ---
 .../mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp   | 113 ---
 .../mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp   |  83 ---
 .../mex_compile/regularisers_GPU/NonlDiff_GPU.cpp  |  92 ---
 .../mex_compile/regularisers_GPU/ROF_TV_GPU.cpp    |  74 --
 .../mex_compile/regularisers_GPU/SB_TV_GPU.cpp     |  91 ---
 .../mex_compile/regularisers_GPU/TGV_GPU.cpp       |  79 --
 Wrappers/Matlab/supp/RMSE.m                        |   7 -
 Wrappers/Matlab/supp/my_red_yellowMAP.mat          | Bin 1761 -> 0 bytes
 Wrappers/Python/CMakeLists.txt                     | 141 ----
 Wrappers/Python/ccpi/__init__.py                   |   0
 Wrappers/Python/ccpi/filters/__init__.py           |   0
 Wrappers/Python/ccpi/filters/regularisers.py       | 214 ------
 Wrappers/Python/conda-recipe/bld.bat               |  20 -
 Wrappers/Python/conda-recipe/build.sh              |  17 -
 .../Python/conda-recipe/conda_build_config.yaml    |   9 -
 Wrappers/Python/conda-recipe/meta.yaml             |  40 -
 Wrappers/Python/conda-recipe/run_test.py           | 819 ---------------------
 Wrappers/Python/demos/demo_cpu_inpainters.py       | 192 -----
 Wrappers/Python/demos/demo_cpu_regularisers.py     | 572 --------------
 Wrappers/Python/demos/demo_cpu_regularisers3D.py   | 458 ------------
 .../Python/demos/demo_cpu_vs_gpu_regularisers.py   | 790 --------------------
 Wrappers/Python/demos/demo_gpu_regularisers.py     | 518 -------------
 Wrappers/Python/demos/demo_gpu_regularisers3D.py   | 460 ------------
 Wrappers/Python/demos/qualitymetrics.py            |  18 -
 Wrappers/Python/setup-regularisers.py.in           |  75 --
 Wrappers/Python/src/cpu_regularisers.pyx           | 685 -----------------
 Wrappers/Python/src/gpu_regularisers.pyx           | 640 ----------------
 data/SinoInpaint.mat                               | Bin 3335061 -> 0 bytes
 data/lena_gray_512.tif                             | Bin 262598 -> 0 bytes
 recipes/regularisers/bld.bat                       |  21 -
 recipes/regularisers/build.sh                      |  19 -
 recipes/regularisers/meta.yaml                     |  27 -
 run.sh                                             |  19 -
 109 files changed, 2 insertions(+), 18689 deletions(-)
 delete mode 100644 CMake/FindAnacondaEnvironment.cmake
 delete mode 100644 Core/CCPiDefines.h
 delete mode 100644 Core/CMakeLists.txt
 delete mode 100644 Core/inpainters_CPU/Diffusion_Inpaint_core.c
 delete mode 100644 Core/inpainters_CPU/Diffusion_Inpaint_core.h
 delete mode 100644 Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
 delete mode 100644 Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
 delete mode 100644 Core/regularisers_CPU/Diffus4th_order_core.c
 delete mode 100644 Core/regularisers_CPU/Diffus4th_order_core.h
 delete mode 100644 Core/regularisers_CPU/Diffusion_core.c
 delete mode 100644 Core/regularisers_CPU/Diffusion_core.h
 delete mode 100644 Core/regularisers_CPU/FGP_TV_core.c
 delete mode 100644 Core/regularisers_CPU/FGP_TV_core.h
 delete mode 100644 Core/regularisers_CPU/FGP_dTV_core.c
 delete mode 100644 Core/regularisers_CPU/FGP_dTV_core.h
 delete mode 100644 Core/regularisers_CPU/LLT_ROF_core.c
 delete mode 100644 Core/regularisers_CPU/LLT_ROF_core.h
 delete mode 100644 Core/regularisers_CPU/Nonlocal_TV_core.c
 delete mode 100644 Core/regularisers_CPU/Nonlocal_TV_core.h
 delete mode 100644 Core/regularisers_CPU/PatchSelect_core.c
 delete mode 100644 Core/regularisers_CPU/PatchSelect_core.h
 delete mode 100644 Core/regularisers_CPU/ROF_TV_core.c
 delete mode 100644 Core/regularisers_CPU/ROF_TV_core.h
 delete mode 100755 Core/regularisers_CPU/SB_TV_core.c
 delete mode 100644 Core/regularisers_CPU/SB_TV_core.h
 delete mode 100644 Core/regularisers_CPU/TGV_core.c
 delete mode 100644 Core/regularisers_CPU/TGV_core.h
 delete mode 100755 Core/regularisers_CPU/TNV_core.c
 delete mode 100644 Core/regularisers_CPU/TNV_core.h
 delete mode 100644 Core/regularisers_CPU/utils.c
 delete mode 100644 Core/regularisers_CPU/utils.h
 delete mode 100644 Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
 delete mode 100644 Core/regularisers_GPU/Diffus_4thO_GPU_core.h
 delete mode 100644 Core/regularisers_GPU/LLT_ROF_GPU_core.cu
 delete mode 100644 Core/regularisers_GPU/LLT_ROF_GPU_core.h
 delete mode 100644 Core/regularisers_GPU/NonlDiff_GPU_core.cu
 delete mode 100644 Core/regularisers_GPU/NonlDiff_GPU_core.h
 delete mode 100644 Core/regularisers_GPU/PatchSelect_GPU_core.cu
 delete mode 100644 Core/regularisers_GPU/PatchSelect_GPU_core.h
 delete mode 100644 Core/regularisers_GPU/TGV_GPU_core.cu
 delete mode 100644 Core/regularisers_GPU/TGV_GPU_core.h
 delete mode 100755 Core/regularisers_GPU/TV_FGP_GPU_core.cu
 delete mode 100755 Core/regularisers_GPU/TV_FGP_GPU_core.h
 delete mode 100755 Core/regularisers_GPU/TV_ROF_GPU_core.cu
 delete mode 100755 Core/regularisers_GPU/TV_ROF_GPU_core.h
 delete mode 100755 Core/regularisers_GPU/TV_SB_GPU_core.cu
 delete mode 100755 Core/regularisers_GPU/TV_SB_GPU_core.h
 delete mode 100644 Core/regularisers_GPU/dTV_FGP_GPU_core.cu
 delete mode 100644 Core/regularisers_GPU/dTV_FGP_GPU_core.h
 delete mode 100644 Core/regularisers_GPU/shared.h
 delete mode 100644 Wrappers/CMakeLists.txt
 delete mode 100755 Wrappers/Matlab/CMakeLists.txt
 delete mode 100644 Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
 delete mode 100644 Wrappers/Matlab/demos/demoMatlab_denoise.m
 delete mode 100644 Wrappers/Matlab/demos/demoMatlab_inpaint.m
 delete mode 100644 Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
 delete mode 100644 Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
 delete mode 100644 Wrappers/Matlab/mex_compile/compileGPU_mex.m
 delete mode 100644 Wrappers/Matlab/mex_compile/installed/MEXed_files_location.txt
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/TNV.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/TV_energy.c
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
 delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
 delete mode 100644 Wrappers/Matlab/supp/RMSE.m
 delete mode 100644 Wrappers/Matlab/supp/my_red_yellowMAP.mat
 delete mode 100644 Wrappers/Python/CMakeLists.txt
 delete mode 100644 Wrappers/Python/ccpi/__init__.py
 delete mode 100644 Wrappers/Python/ccpi/filters/__init__.py
 delete mode 100644 Wrappers/Python/ccpi/filters/regularisers.py
 delete mode 100644 Wrappers/Python/conda-recipe/bld.bat
 delete mode 100644 Wrappers/Python/conda-recipe/build.sh
 delete mode 100644 Wrappers/Python/conda-recipe/conda_build_config.yaml
 delete mode 100644 Wrappers/Python/conda-recipe/meta.yaml
 delete mode 100755 Wrappers/Python/conda-recipe/run_test.py
 delete mode 100644 Wrappers/Python/demos/demo_cpu_inpainters.py
 delete mode 100644 Wrappers/Python/demos/demo_cpu_regularisers.py
 delete mode 100644 Wrappers/Python/demos/demo_cpu_regularisers3D.py
 delete mode 100644 Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
 delete mode 100644 Wrappers/Python/demos/demo_gpu_regularisers.py
 delete mode 100644 Wrappers/Python/demos/demo_gpu_regularisers3D.py
 delete mode 100644 Wrappers/Python/demos/qualitymetrics.py
 delete mode 100644 Wrappers/Python/setup-regularisers.py.in
 delete mode 100644 Wrappers/Python/src/cpu_regularisers.pyx
 delete mode 100644 Wrappers/Python/src/gpu_regularisers.pyx
 delete mode 100644 data/SinoInpaint.mat
 delete mode 100644 data/lena_gray_512.tif
 delete mode 100644 recipes/regularisers/bld.bat
 delete mode 100644 recipes/regularisers/build.sh
 delete mode 100644 recipes/regularisers/meta.yaml
 delete mode 100644 run.sh

diff --git a/CMake/FindAnacondaEnvironment.cmake b/CMake/FindAnacondaEnvironment.cmake
deleted file mode 100644
index 6475128..0000000
--- a/CMake/FindAnacondaEnvironment.cmake
+++ /dev/null
@@ -1,154 +0,0 @@
-#   Copyright 2017 Edoardo Pasca
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
-# #.rst:
-# FindAnacondaEnvironment
-# --------------
-#
-# Find Python executable and library for a specific Anaconda environment
-#
-# This module finds the Python interpreter for a specific Anaconda enviroment, 
-# if installed and determines where the include files and libraries are.  
-# This code sets the following variables:
-#
-# ::
-#   PYTHONINTERP_FOUND         - if the Python interpret has been found
-#   PYTHON_EXECUTABLE          - the Python interpret found
-#   PYTHON_LIBRARY             - path to the python library
-#   PYTHON_INCLUDE_PATH        - path to where Python.h is found (deprecated)
-#   PYTHON_INCLUDE_DIRS        - path to where Python.h is found
-#   PYTHONLIBS_VERSION_STRING  - version of the Python libs found (since CMake 2.8.8)
-#   PYTHON_VERSION_MAJOR       - major Python version
-#   PYTHON_VERSION_MINOR       - minor Python version
-#   PYTHON_VERSION_PATCH       - patch Python version
-
-
-
-function (findPythonForAnacondaEnvironment env)
-	if (WIN32)
-	  file(TO_CMAKE_PATH ${env}/python.exe PYTHON_EXECUTABLE)
-        elseif (UNIX)
-  	  file(TO_CMAKE_PATH ${env}/bin/python PYTHON_EXECUTABLE)
-	endif()
-
-	
-	message("findPythonForAnacondaEnvironment Found Python Executable" ${PYTHON_EXECUTABLE})
-	####### FROM FindPythonInterpr ########
-	# determine python version string
-	if(PYTHON_EXECUTABLE)
-		execute_process(COMMAND "${PYTHON_EXECUTABLE}" -c
-								"import sys; sys.stdout.write(';'.join([str(x) for x in sys.version_info[:3]]))"
-						OUTPUT_VARIABLE _VERSION
-						RESULT_VARIABLE _PYTHON_VERSION_RESULT
-						ERROR_QUIET)
-		if(NOT _PYTHON_VERSION_RESULT)
-			string(REPLACE ";" "." _PYTHON_VERSION_STRING "${_VERSION}")
-			list(GET _VERSION 0 _PYTHON_VERSION_MAJOR)
-			list(GET _VERSION 1 _PYTHON_VERSION_MINOR)
-			list(GET _VERSION 2 _PYTHON_VERSION_PATCH)
-			if(PYTHON_VERSION_PATCH EQUAL 0)
-				# it's called "Python 2.7", not "2.7.0"
-				string(REGEX REPLACE "\\.0$" "" _PYTHON_VERSION_STRING "${PYTHON_VERSION_STRING}")
-			endif()
-		else()
-			# sys.version predates sys.version_info, so use that
-			execute_process(COMMAND "${PYTHON_EXECUTABLE}" -c "import sys; sys.stdout.write(sys.version)"
-							OUTPUT_VARIABLE _VERSION
-							RESULT_VARIABLE _PYTHON_VERSION_RESULT
-							ERROR_QUIET)
-			if(NOT _PYTHON_VERSION_RESULT)
-				string(REGEX REPLACE " .*" "" _PYTHON_VERSION_STRING "${_VERSION}")
-				string(REGEX REPLACE "^([0-9]+)\\.[0-9]+.*" "\\1" _PYTHON_VERSION_MAJOR "${PYTHON_VERSION_STRING}")
-				string(REGEX REPLACE "^[0-9]+\\.([0-9])+.*" "\\1" _PYTHON_VERSION_MINOR "${PYTHON_VERSION_STRING}")
-				if(PYTHON_VERSION_STRING MATCHES "^[0-9]+\\.[0-9]+\\.([0-9]+)")
-					set(PYTHON_VERSION_PATCH "${CMAKE_MATCH_1}")
-				else()
-					set(PYTHON_VERSION_PATCH "0")
-				endif()
-			else()
-				# sys.version was first documented for Python 1.5, so assume
-				# this is older.
-				set(PYTHON_VERSION_STRING "1.4" PARENT_SCOPE)
-				set(PYTHON_VERSION_MAJOR "1" PARENT_SCOPE)
-				set(PYTHON_VERSION_MINOR "4" PARENT_SCOPE)
-				set(PYTHON_VERSION_PATCH "0" PARENT_SCOPE)
-			endif()
-		endif()
-		unset(_PYTHON_VERSION_RESULT)
-		unset(_VERSION)
-	endif()
-	###############################################
-	
-	set (PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} PARENT_SCOPE)
-	set (PYTHONINTERP_FOUND "ON" PARENT_SCOPE)
-	set (PYTHON_VERSION_STRING ${_PYTHON_VERSION_STRING} PARENT_SCOPE)
-	set (PYTHON_VERSION_MAJOR ${_PYTHON_VERSION_MAJOR} PARENT_SCOPE)
-	set (PYTHON_VERSION_MINOR ${_PYTHON_VERSION_MINOR} PARENT_SCOPE)
-	set (PYTHON_VERSION_PATCH ${_PYTHON_VERSION_PATCH} PARENT_SCOPE)
-	message("My version found " ${PYTHON_VERSION_STRING})
-	## find conda executable
-	if (WIN32)
-	  set (CONDA_EXECUTABLE ${env}/Script/conda PARENT_SCOPE)
-	elseif(UNIX)
-	  set (CONDA_EXECUTABLE ${env}/bin/conda PARENT_SCOPE)
-	endif()
-endfunction()
-
-
-
-set(Python_ADDITIONAL_VERSIONS 3.5)
-
-find_package(PythonInterp)
-if (PYTHONINTERP_FOUND)
-  
-  message("Found interpret " ${PYTHON_EXECUTABLE})
-  message("Python Library " ${PYTHON_LIBRARY})
-  message("Python Include Dir " ${PYTHON_INCLUDE_DIR})
-  message("Python Include Path " ${PYTHON_INCLUDE_PATH})
-  
-  foreach(pv ${PYTHON_VERSION_STRING})
-    message("Found interpret " ${pv})
-  endforeach()
-endif()
-
-
-
-find_package(PythonLibs)
-if (PYTHONLIB_FOUND) 
-  message("Found PythonLibs PYTHON_LIBRARIES " ${PYTHON_LIBRARIES})
-  message("Found PythonLibs PYTHON_INCLUDE_PATH " ${PYTHON_INCLUDE_PATH})
-  message("Found PythonLibs PYTHON_INCLUDE_DIRS " ${PYTHON_INCLUDE_DIRS})
-  message("Found PythonLibs PYTHONLIBS_VERSION_STRING " ${PYTHONLIBS_VERSION_STRING}  )
-else()
-  message("No PythonLibs Found")  
-endif()
-
-
-
-
-function(findPythonPackagesPath)
-   execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print (get_python_lib())"
-                      RESULT_VARIABLE PYTHON_CVPY_PROCESS
-                      OUTPUT_VARIABLE PYTHON_STD_PACKAGES_PATH
-                      OUTPUT_STRIP_TRAILING_WHITESPACE)
-   #message("STD_PACKAGES " ${PYTHON_STD_PACKAGES_PATH})
-   if("${PYTHON_STD_PACKAGES_PATH}" MATCHES "site-packages")
-        set(_PYTHON_PACKAGES_PATH "python${PYTHON_VERSION_MAJOR_MINOR}/site-packages")
-   endif()
-
-    SET(PYTHON_PACKAGES_PATH "${PYTHON_STD_PACKAGES_PATH}" PARENT_SCOPE)
-
-endfunction()
-
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b95107a..5d3bbbd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,5 +55,5 @@ endif()
 message(STATUS "Python wrappers will be installed in " ${PYTHON_DEST})
 
 
-add_subdirectory(Core)
-add_subdirectory(Wrappers)
+add_subdirectory(src/Core)
+add_subdirectory(src)
diff --git a/Core/CCPiDefines.h b/Core/CCPiDefines.h
deleted file mode 100644
index d3038f9..0000000
--- a/Core/CCPiDefines.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Srikanth Nagella, Edoardo Pasca, Daniil Kazantsev
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-#ifndef CCPIDEFINES_H
-#define CCPIDEFINES_H
-
-#if defined(_WIN32) || defined(__WIN32__)
-  #if defined(CCPiCore_EXPORTS) || defined(CCPiNexusWidget_EXPORTS) || defined(ContourTreeSegmentation_EXPORTS) || defined(ContourTree_EXPORTS)// add by CMake 
-    #define  CCPI_EXPORT __declspec(dllexport)
-    #define EXPIMP_TEMPLATE
-  #else
-    #define  CCPI_EXPORT __declspec(dllimport)
-    #define EXPIMP_TEMPLATE extern
-  #endif /* CCPi_EXPORTS */
-#elif defined(linux) || defined(__linux) || defined(__APPLE__)
- #define CCPI_EXPORT
-#endif
-
-#endif
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt
deleted file mode 100644
index b3c0dfb..0000000
--- a/Core/CMakeLists.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-#   Copyright 2018 Edoardo Pasca
-#cmake_minimum_required (VERSION 3.0)
-
-project(RGL_core)
-#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
-
-# The version number.
-
-set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
-
-# conda orchestrated build
-message("CIL_VERSION ${CIL_VERSION}")
-#include (GenerateExportHeader)
-
-
-find_package(OpenMP)
-if (OPENMP_FOUND)
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
-   set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
-   set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
-   
-endif()
-
-## Build the regularisers package as a library
-message("Creating Regularisers as a shared library")
-
-message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
-message("CMAKE_C_FLAGS ${CMAKE_C_FLAGS}")
-message("CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}")
-message("CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}")
-message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}")
-
-set(CMAKE_BUILD_TYPE "Release")
-
-if(WIN32)
-  set (FLAGS "/DWIN32 /EHsc /DCCPiCore_EXPORTS /openmp")
-  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
-  set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
-  set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT.lib")
-  
-  set (EXTRA_LIBRARIES)
-		
-  message("library lib: ${LIBRARY_LIB}")
-  
-elseif(UNIX)
-   set (FLAGS "-O2 -funsigned-char -Wall  -Wl,--no-undefined  -DCCPiReconstructionIterative_EXPORTS ")  
-   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
-   set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
-  
-   set (EXTRA_LIBRARIES 
-		"gomp"
-		"m"
-		)
-   
-endif()
-message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
-
-## Build the regularisers package as a library
-message("Adding regularisers as a shared library")
-
-#set(CMAKE_C_COMPILER /apps/pgi/linux86-64/17.4/bin/pgcc)
-#set(CMAKE_C_FLAGS "-acc -Minfo -ta=tesla:cc20 -openmp")
-#set(CMAKE_C_FLAGS "-acc -Minfo -ta=multicore -openmp -fPIC")
-add_library(cilreg SHARED
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffusion_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffus4th_order_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/LLT_ROF_core.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TNV_core.c
-            ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Nonlocal_TV_core.c
-            ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchSelect_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/utils.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/Diffusion_Inpaint_core.c
-	    ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/NonlocalMarching_Inpaint_core.c
-	    )
-target_link_libraries(cilreg ${EXTRA_LIBRARIES} )
-include_directories(cilreg PUBLIC 
-                      ${LIBRARY_INC}/include 
-					  ${CMAKE_CURRENT_SOURCE_DIR}
-		              ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/
-		              ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/  )
-
-## Install
-
-if (UNIX)
-message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
-install(TARGETS cilreg
-	LIBRARY DESTINATION lib
-	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
-	)
-elseif(WIN32)
-message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
-  install(TARGETS cilreg 
-	RUNTIME DESTINATION bin
-	ARCHIVE DESTINATION lib
-	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
-	)
-endif()
-
-
-
-# GPU Regularisers
-if (BUILD_CUDA)
-    find_package(CUDA)
-    if (CUDA_FOUND)
-      set(CUDA_NVCC_FLAGS "-Xcompiler -fPIC -shared -D_FORCE_INLINES")
-      message("CUDA FLAGS ${CUDA_NVCC_FLAGS}")
-      CUDA_ADD_LIBRARY(cilregcuda SHARED
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_SB_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/LLT_ROF_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TGV_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/NonlDiff_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/Diffus_4thO_GPU_core.cu
-        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/PatchSelect_GPU_core.cu
-      )
-      if (UNIX)
-        message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
-        install(TARGETS cilregcuda
-        LIBRARY DESTINATION lib
-        CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
-        )
-      elseif(WIN32)
-        message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
-        install(TARGETS cilregcuda
-        RUNTIME DESTINATION bin
-        ARCHIVE DESTINATION lib
-        CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
-        )
-      endif()
-    else()
-      message("CUDA NOT FOUND")
-    endif()
-endif()
-
-if (${BUILD_MATLAB_WRAPPER})
-  if (WIN32)
-        install(TARGETS cilreg DESTINATION ${MATLAB_DEST})
-        if (CUDA_FOUND)
-            install(TARGETS cilregcuda DESTINATION ${MATLAB_DEST})
-        endif()
-  endif()
-endif()
diff --git a/Core/inpainters_CPU/Diffusion_Inpaint_core.c b/Core/inpainters_CPU/Diffusion_Inpaint_core.c
deleted file mode 100644
index 08b168a..0000000
--- a/Core/inpainters_CPU/Diffusion_Inpaint_core.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Diffusion_Inpaint_core.h"
-#include "utils.h"
-
-/*sign function*/
-int signNDF_inc(float x) {
-    return (x > 0) - (x < 0);
-}
-
-/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Image/volume to inpaint
- * 2. Mask of the same size as (1) in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
- * 3. lambda - regularization parameter
- * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 5. Number of iterations, for explicit scheme >= 150 is recommended 
- * 6. tau - time-marching step for explicit scheme
- * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Inpainted image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
-float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ)
-{
-    long i, pointsone;
-    float sigmaPar2;
-    sigmaPar2 = sigmaPar/sqrt(2.0f);
-    
-    /* copy into output */
-    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-    
-    pointsone = 0;
-    for (i=0; i<dimY*dimX*dimZ; i++) if (Mask[i] == 1) pointsone++;
-        
-    if (pointsone == 0) printf("%s \n", "Nothing to inpaint, zero mask!");
-    else {
-    
-    if (dimZ == 1) {
-    /* running 2D diffusion iterations */
-    for(i=0; i < iterationsNumb; i++) {
-            if (sigmaPar == 0.0f) LinearDiff_Inp_2D(Input, Mask, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
-            else NonLinearDiff_Inp_2D(Input, Mask, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
-		}
-	}
-	else {
-	/* running 3D diffusion iterations */
-    for(i=0; i < iterationsNumb; i++) {
-            if (sigmaPar == 0.0f) LinearDiff_Inp_3D(Input, Mask, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-            else NonLinearDiff_Inp_3D(Input, Mask, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
-            }
-         }
-	}
-    return *Output;
-}
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-/* linear diffusion (heat equation) */
-float LinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY)
-{
-	long i,j,i1,i2,j1,j2,index;
-	float e,w,n,s,e1,w1,n1,s1;
-	
-#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = j*dimX+i;
-             
-             if (Mask[index] > 0) {
-				/*inpainting process*/
-                e = Output[j*dimX+i1];
-                w = Output[j*dimX+i2];
-                n = Output[j1*dimX+i];
-                s = Output[j2*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
-			}
-		}}
-	return *Output;
-}
-
-/* nonlinear diffusion */
-float NonLinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
-{
-	long i,j,i1,i2,j1,j2,index;
-	float e,w,n,s,e1,w1,n1,s1;
-	
-#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = j*dimX+i;
-                
-        if (Mask[index] > 0) {
-		/*inpainting process*/
-                e = Output[j*dimX+i1];
-                w = Output[j*dimX+i2];
-                n = Output[j1*dimX+i];
-                s = Output[j2*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-
-            if (penaltytype == 1){
-            /* Huber penalty */
-            if (fabs(e1) > sigmaPar) e1 =  signNDF_inc(e1);
-            else e1 = e1/sigmaPar;
-            
-            if (fabs(w1) > sigmaPar) w1 =  signNDF_inc(w1);
-            else w1 = w1/sigmaPar;
-            
-            if (fabs(n1) > sigmaPar) n1 =  signNDF_inc(n1);
-            else n1 = n1/sigmaPar;
-            
-            if (fabs(s1) > sigmaPar) s1 =  signNDF_inc(s1);
-            else s1 = s1/sigmaPar;
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            }
-            else {
-				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-				break;
-				}
-           Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));  
-		}
-		}}
-	return *Output;
-}
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-/* linear diffusion (heat equation) */
-float LinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
-{
-	long i,j,k,i1,i2,j1,j2,k1,k2,index;
-	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-	
-#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
-for(k=0; k<dimZ; k++) {
-	k1 = k+1; if (k1 == dimZ) k1 = k-1;
-    k2 = k-1; if (k2 < 0) k2 = k+1;
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = (dimX*dimY)*k + j*dimX+i;
-            
-            if (Mask[index] > 0) {
-			/*inpainting process*/
-            
-                e = Output[(dimX*dimY)*k + j*dimX+i1];
-                w = Output[(dimX*dimY)*k + j*dimX+i2];
-                n = Output[(dimX*dimY)*k + j1*dimX+i];
-                s = Output[(dimX*dimY)*k + j2*dimX+i];
-                u = Output[(dimX*dimY)*k1 + j*dimX+i];
-                d = Output[(dimX*dimY)*k2 + j*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                u1 = u - Output[index];
-                d1 = d - Output[index];
-                
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
-			}
-		}}}
-	return *Output;
-}
-
-float NonLinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
-{
-	long i,j,k,i1,i2,j1,j2,k1,k2,index;
-	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-	
-#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
-for(k=0; k<dimZ; k++) {
-	k1 = k+1; if (k1 == dimZ) k1 = k-1;
-    k2 = k-1; if (k2 < 0) k2 = k+1;
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = (dimX*dimY)*k + j*dimX+i;
-            
-        if (Mask[index] > 0) {
-			/*inpainting process*/
-                e = Output[(dimX*dimY)*k + j*dimX+i1];
-                w = Output[(dimX*dimY)*k + j*dimX+i2];
-                n = Output[(dimX*dimY)*k + j1*dimX+i];
-                s = Output[(dimX*dimY)*k + j2*dimX+i];
-                u = Output[(dimX*dimY)*k1 + j*dimX+i];
-                d = Output[(dimX*dimY)*k2 + j*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                u1 = u - Output[index];
-                d1 = d - Output[index];
-                
-             if (penaltytype == 1){
-            /* Huber penalty */
-            if (fabs(e1) > sigmaPar) e1 =  signNDF_inc(e1);
-            else e1 = e1/sigmaPar;
-            
-            if (fabs(w1) > sigmaPar) w1 =  signNDF_inc(w1);
-            else w1 = w1/sigmaPar;
-            
-            if (fabs(n1) > sigmaPar) n1 =  signNDF_inc(n1);
-            else n1 = n1/sigmaPar;
-            
-            if (fabs(s1) > sigmaPar) s1 =  signNDF_inc(s1);
-            else s1 = s1/sigmaPar;
-            
-            if (fabs(u1) > sigmaPar) u1 =  signNDF_inc(u1);
-            else u1 = u1/sigmaPar;
-            
-            if (fabs(d1) > sigmaPar) d1 =  signNDF_inc(d1);
-            else d1 = d1/sigmaPar;
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
-            u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
-            d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            if (fabs(u1) <= sigmaPar) u1 =  u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
-            else u1 = 0.0f;
-            if (fabs(d1) <= sigmaPar) d1 =  d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
-            else d1 = 0.0f;
-            }
-            else {
-				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-				break;
-				}
-
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
-		}
-		}}}
-	return *Output;
-}
diff --git a/Core/inpainters_CPU/Diffusion_Inpaint_core.h b/Core/inpainters_CPU/Diffusion_Inpaint_core.h
deleted file mode 100644
index a96fe79..0000000
--- a/Core/inpainters_CPU/Diffusion_Inpaint_core.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-
-/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Image/volume to inpaint
- * 2. Mask of the same size as (1) in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
- * 3. lambda - regularization parameter
- * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 5. Number of iterations, for explicit scheme >= 150 is recommended 
- * 6. tau - time-marching step for explicit scheme
- * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Inpainted image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb,  float tau, int penaltytype, int dimX, int dimY, int dimZ);
-
-CCPI_EXPORT float LinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY);
-CCPI_EXPORT float NonLinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY);
-CCPI_EXPORT float LinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float NonLinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c b/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
deleted file mode 100644
index b488ca4..0000000
--- a/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NonlocalMarching_Inpaint_core.h"
-#include "utils.h"
-
-
-/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
- * The method is heuristic but computationally efficent (especially for larger images).
- * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
- * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
- *
- * Input:
- * 1. 2D image or sinogram with horizontal or inclined regions of missing data
- * 2. Mask of the same size as A in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
- * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice
- *
- * Output:
- * 1. Inpainted image or a sinogram
- * 2. updated mask
- *
- * Reference: D. Kazantsev (paper in preparation)
- */
-
-float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ)
-{
-    int i, j, i_m, j_m, counter, iter, iterations_number, W_fullsize, switchmask, switchcurr, counterElements;
-    float *Gauss_weights;
-    
-    /* copying M to M_upd */
-    copyIm_unchar(M, M_upd, dimX, dimY, 1);
-    
-    /* Copying the image */
-    copyIm(Input, Output, dimX, dimY, 1);
-    
-    /* Find how many inpainting iterations (equal to the number of ones) required based on a mask  */
-    if (iterationsNumb == 0) {
-        iterations_number = 0;
-        for (i=0; i<dimY*dimX; i++) {
-            if (M[i] == 1) iterations_number++;
-        }
-        if ((int)(iterations_number/dimY) > dimX) iterations_number = dimX;
-    }
-    else iterations_number = iterationsNumb;
-    
-    if (iterations_number == 0) printf("%s \n", "Nothing to inpaint, zero mask!");
-    else {
-        
-        printf("%s %i \n", "Max iteration number equals to:", iterations_number);
-        
-        /* Inpainting iterations run here*/
-        int W_halfsize = 1;
-        for(iter=0; iter < iterations_number; iter++) {
-            
-            //if (mod (iter, 2) == 0) {W_halfsize += 1;}
-            // printf("%i \n", W_halfsize);
-            
-            /* pre-calculation of Gaussian distance weights  */
-            W_fullsize = (int)(2*W_halfsize + 1); /*full size of similarity window */
-            Gauss_weights = (float*)calloc(W_fullsize*W_fullsize,sizeof(float ));
-            counter = 0;
-            for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
-                for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
-                    Gauss_weights[counter] = exp(-(pow((i_m), 2) + pow((j_m), 2))/(2*W_halfsize*W_halfsize));
-                    counter++;
-                }
-            }
-            
-            if (trigger == 0) {
-                /*Matlab*/
-#pragma omp parallel for shared(Output, M_upd, Gauss_weights) private(i, j, switchmask, switchcurr)
-                for(j=0; j<dimY; j++) {
-                    switchmask = 0;
-                    for(i=0; i<dimX; i++) {
-                        switchcurr = 0;
-                        if ((M_upd[j*dimX + i] == 1) && (switchmask == 0)) {
-                            /* perform inpainting of the current pixel */
-                            inpaint_func(Output, M_upd, Gauss_weights, i, j, dimX, dimY, W_halfsize, W_fullsize);
-                            /* add value to the mask*/
-                            M_upd[j*dimX + i] = 0;
-                            switchmask = 1; switchcurr = 1;
-                        }
-                        if ((M_upd[j*dimX + i] == 0) && (switchmask == 1) && (switchcurr == 0)) {
-                            /* perform inpainting of the previous (i-1) pixel */
-                            inpaint_func(Output, M_upd, Gauss_weights, i-1, j, dimX, dimY, W_halfsize, W_fullsize);
-                            /* add value to the mask*/
-                            M_upd[(j)*dimX + i-1] = 0;
-                            switchmask = 0;
-                        }
-                    }
-                }
-            }
-            else {
-                /*Python*/
-                /* find a point in the mask to inpaint */
-#pragma omp parallel for shared(Output, M_upd, Gauss_weights) private(i, j, switchmask, switchcurr)
-                for(i=0; i<dimX; i++) {
-                    switchmask = 0;
-                    for(j=0; j<dimY; j++) {
-                        switchcurr = 0;
-                        if ((M_upd[j*dimX + i] == 1) && (switchmask == 0)) {
-                            /* perform inpainting of the current pixel */
-                            inpaint_func(Output, M_upd, Gauss_weights, i, j, dimX, dimY, W_halfsize, W_fullsize);
-                            /* add value to the mask*/
-                            M_upd[j*dimX + i] = 0;
-                            switchmask = 1; switchcurr = 1;
-                        }
-                        if ((M_upd[j*dimX + i] == 0) && (switchmask == 1) && (switchcurr == 0)) {
-                            /* perform inpainting of the previous (j-1) pixel */
-                            inpaint_func(Output, M_upd, Gauss_weights, i, j-1, dimX, dimY, W_halfsize, W_fullsize);
-                            /* add value to the mask*/
-                            M_upd[(j-1)*dimX + i] = 0;
-                            switchmask = 0;
-                        }
-                    }
-                }
-            }
-            free(Gauss_weights);
-            
-            /* check if possible to terminate iterations earlier */
-            counterElements = 0;
-            for(i=0; i<dimX*dimY; i++) if (M_upd[i] == 0) counterElements++;
-            
-            if (counterElements == dimX*dimY) {
-                printf("%s \n", "Padding completed!");
-                break;
-            }
-            W_halfsize += SW_increment;
-        }
-        printf("%s %i \n", "Iterations stopped at:", iter);
-    }
-    return *Output;
-}
-
-float inpaint_func(float *U, unsigned char *M_upd, float *Gauss_weights, int i, int j, int dimX, int dimY, int W_halfsize, int W_fullsize)
-{
-    int i1, j1, i_m, j_m, counter;
-    float sum_val, sumweight;
-    
-    /*method 1: inpainting based on Euclidian weights */
-    sumweight = 0.0f;
-    counter = 0; sum_val = 0.0f;
-    for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
-        i1 = i+i_m;
-        for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
-                if (M_upd[j1*dimX + i1] == 0) {
-                    sumweight += Gauss_weights[counter];
-                }
-            }
-            counter++;
-        }
-    }
-    counter = 0; sum_val = 0.0f;
-    for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
-        i1 = i+i_m;
-        for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
-                if ((M_upd[j1*dimX + i1] == 0) && (sumweight != 0.0f)) {
-                    /* we have data so add it with Euc weight */
-                    sum_val += (Gauss_weights[counter]/sumweight)*U[j1*dimX + i1];
-                }
-            }
-            counter++;
-        }
-    }
-    U[j*dimX + i] = sum_val;
-    return *U;
-}
-
diff --git a/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h b/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
deleted file mode 100644
index 0f99ed4..0000000
--- a/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-
-/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
- * The method is heuristic but computationally efficent (especially for larger images).
- * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
- * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
- *
- * Inputs:
- * 1. 2D image or sinogram with horizontal or inclined regions of missing data
- * 2. Mask of the same size as A in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
- * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice
-
- * Output:
- * 1. Inpainted image or a sinogram
- * 2. updated mask
- *
- * Reference: TBA
- */
-
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float inpaint_func(float *U, unsigned char *M_upd, float *Gauss_weights, int i, int j, int dimX, int dimY, int W_halfsize, int W_fullsize);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/Diffus4th_order_core.c b/Core/regularisers_CPU/Diffus4th_order_core.c
deleted file mode 100644
index 01f4f64..0000000
--- a/Core/regularisers_CPU/Diffus4th_order_core.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Diffus4th_order_core.h"
-#include "utils.h"
-
-#define EPS 1.0e-7
-
-/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma)
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for the explicit scheme
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
- */
-
-float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
-{
-    int i,DimTotal;
-    float sigmaPar2;
-    float *W_Lapl=NULL;
-    sigmaPar2 = sigmaPar*sigmaPar;
-    DimTotal =  dimX*dimY*dimZ;
-    
-    W_Lapl = calloc(DimTotal, sizeof(float));
-    
-    /* copy into output */
-    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-    
-    if (dimZ == 1) {
-    /* running 2D diffusion iterations */
-    for(i=0; i < iterationsNumb; i++) {
-            /* Calculating weighted Laplacian */
-            Weighted_Laplc2D(W_Lapl, Output, sigmaPar2, dimX, dimY);
-            /* Perform iteration step */
-            Diffusion_update_step2D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY));
-		}
-	}
-	else {
-	/* running 3D diffusion iterations */
-    for(i=0; i < iterationsNumb; i++) {
-		    /* Calculating weighted Laplacian */
-            Weighted_Laplc3D(W_Lapl, Output, sigmaPar2, dimX, dimY, dimZ);
-            /* Perform iteration step */
-            Diffusion_update_step3D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-		}
-	}
-	free(W_Lapl);
-    return *Output;
-}
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY)
-{   
-    long i,j,i1,i2,j1,j2,index;
-    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
-
-        #pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
-        for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				 /* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-				
-				index = j*dimX+i;
-				
-				gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
-				gradX_sq = pow(gradX,2);
-				
-				gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
-                gradY_sq = pow(gradY,2);
-                
-                gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
-                gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
-                
-                gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
-                xy_2 = 2.0f*gradX*gradY*gradXY;
-                
-                denom =  gradX_sq + gradY_sq;
-                
-                if (denom <= EPS) {
-                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
-                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS; 
-                    }
-                else  {
-                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
-                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;  
-                    }
-
-                c = 1.0f/(1.0f + denom/sigma);
-                c_sq = c*c;
-                
-                W_Lapl[index] = c_sq*V_norm + c*V_orth;
-            }
-        }
-        return *W_Lapl;
-}
-
-float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY)
-{
-	long i,j,i1,i2,j1,j2,index;
-    float gradXXc, gradYYc;
-
-            #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
-        for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				 /* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-					index = j*dimX+i;
-					
-                    gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
-                    gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
-
-                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
-                }
-            }
-	return *Output;
-}
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY, long dimZ)
-{   
-    long i,j,k,i1,i2,j1,j2,k1,k2,index;
-    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
-        
-        #pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
-        for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				/* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-				
-				for(k=0; k<dimZ; k++) {
-				/* symmetric boundary conditions */
-				k1 = k+1; if (k1 == dimZ) k1 = k-1;
-				k2 = k-1; if (k2 < 0) k2 = k+1;
-				
-				index = (dimX*dimY)*k + j*dimX+i;
-				
-				gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
-				gradX_sq = pow(gradX,2);
-				
-				gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
-                gradY_sq = pow(gradY,2);
-                
-                gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
-                gradZ_sq = pow(gradZ,2);
-                
-                gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
-                gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
-                gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
-                                
-                gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
-                gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
-                gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
-                
-                xy_2  = 2.0f*gradX*gradY*gradXY;
-                xyz_1 = 2.0f*gradX*gradZ*gradXZ;
-                xyz_2 = 2.0f*gradY*gradZ*gradYZ;
-                
-                denom =  gradX_sq + gradY_sq + gradZ_sq;
-                
-					if (denom <= EPS) {
-					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
-                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
-					}
-					else  {
-					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
-                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
-					}
-
-                c = 1.0f/(1.0f + denom/sigma);
-                c_sq = c*c;
-                
-                W_Lapl[index] = c_sq*V_norm + c*V_orth;
-				}
-            }
-        }
-        return *W_Lapl;
-}
-
-float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ)
-{
-	long i,j,i1,i2,j1,j2,index,k,k1,k2;
-    float gradXXc, gradYYc, gradZZc;
-
-        #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
-        for(i=0; i<dimX; i++) {
-			 /* symmetric boundary conditions */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            for(j=0; j<dimY; j++) {
-				/* symmetric boundary conditions */
-				j1 = j+1; if (j1 == dimY) j1 = j-1;
-				j2 = j-1; if (j2 < 0) j2 = j+1;
-				
-				for(k=0; k<dimZ; k++) {
-				/* symmetric boundary conditions */
-				k1 = k+1; if (k1 == dimZ) k1 = k-1;
-				k2 = k-1; if (k2 < 0) k2 = k+1;
-				
-				index = (dimX*dimY)*k + j*dimX+i;
-				
-                    gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
-                    gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
-                    gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
-                    
-                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
-                }
-            }
-		}
-	return *Output;
-}
diff --git a/Core/regularisers_CPU/Diffus4th_order_core.h b/Core/regularisers_CPU/Diffus4th_order_core.h
deleted file mode 100644
index d81afcb..0000000
--- a/Core/regularisers_CPU/Diffus4th_order_core.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma)
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for explicit scheme
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
- */
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY);
-CCPI_EXPORT float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY);
-CCPI_EXPORT float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/Diffusion_core.c b/Core/regularisers_CPU/Diffusion_core.c
deleted file mode 100644
index b765796..0000000
--- a/Core/regularisers_CPU/Diffusion_core.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Diffusion_core.h"
-#include "utils.h"
-
-#define EPS 1.0e-5
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-/*sign function*/
-int signNDFc(float x) {
-    return (x > 0) - (x < 0);
-}
-
-/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for explicit scheme
- * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
-float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ)
-{
-    int i;
-    float sigmaPar2;
-    sigmaPar2 = sigmaPar/sqrt(2.0f);
-    
-    /* copy into output */
-    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-    
-    if (dimZ == 1) {
-    /* running 2D diffusion iterations */
-    for(i=0; i < iterationsNumb; i++) {
-            if (sigmaPar == 0.0f) LinearDiff2D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
-            else NonLinearDiff2D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
-		}
-	}
-	else {
-	/* running 3D diffusion iterations */
-    for(i=0; i < iterationsNumb; i++) {
-            if (sigmaPar == 0.0f) LinearDiff3D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-            else NonLinearDiff3D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
-		}
-	}
-    return *Output;
-}
-
-
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-/* linear diffusion (heat equation) */
-float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY)
-{
-	long i,j,i1,i2,j1,j2,index;
-	float e,w,n,s,e1,w1,n1,s1;
-	
-#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = j*dimX+i;
-            
-                e = Output[j*dimX+i1];
-                w = Output[j*dimX+i2];
-                n = Output[j1*dimX+i];
-                s = Output[j2*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));  
-		}}
-	return *Output;
-}
-
-/* nonlinear diffusion */
-float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
-{
-	long i,j,i1,i2,j1,j2,index;
-	float e,w,n,s,e1,w1,n1,s1;
-	
-#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = j*dimX+i;
-            
-                e = Output[j*dimX+i1];
-                w = Output[j*dimX+i2];
-                n = Output[j1*dimX+i];
-                s = Output[j2*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                
-            if (penaltytype == 1){
-            /* Huber penalty */
-            if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
-            else e1 = e1/sigmaPar;
-            
-            if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
-            else w1 = w1/sigmaPar;
-            
-            if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
-            else n1 = n1/sigmaPar;
-            
-            if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
-            else s1 = s1/sigmaPar;
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            }
-            else {
-				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-				break;
-				}
-           Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));  
-		}}
-	return *Output;
-}
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-/* linear diffusion (heat equation) */
-float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
-{
-	long i,j,k,i1,i2,j1,j2,k1,k2,index;
-	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-	
-#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
-for(k=0; k<dimZ; k++) {
-	k1 = k+1; if (k1 == dimZ) k1 = k-1;
-    k2 = k-1; if (k2 < 0) k2 = k+1;
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = (dimX*dimY)*k + j*dimX+i;
-            
-                e = Output[(dimX*dimY)*k + j*dimX+i1];
-                w = Output[(dimX*dimY)*k + j*dimX+i2];
-                n = Output[(dimX*dimY)*k + j1*dimX+i];
-                s = Output[(dimX*dimY)*k + j2*dimX+i];
-                u = Output[(dimX*dimY)*k1 + j*dimX+i];
-                d = Output[(dimX*dimY)*k2 + j*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                u1 = u - Output[index];
-                d1 = d - Output[index];
-                
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
-		}}}
-	return *Output;
-}
-
-float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
-{
-	long i,j,k,i1,i2,j1,j2,k1,k2,index;
-	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-	
-#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
-for(k=0; k<dimZ; k++) {
-	k1 = k+1; if (k1 == dimZ) k1 = k-1;
-    k2 = k-1; if (k2 < 0) k2 = k+1;
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = (dimX*dimY)*k + j*dimX+i;
-            
-                e = Output[(dimX*dimY)*k + j*dimX+i1];
-                w = Output[(dimX*dimY)*k + j*dimX+i2];
-                n = Output[(dimX*dimY)*k + j1*dimX+i];
-                s = Output[(dimX*dimY)*k + j2*dimX+i];
-                u = Output[(dimX*dimY)*k1 + j*dimX+i];
-                d = Output[(dimX*dimY)*k2 + j*dimX+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                u1 = u - Output[index];
-                d1 = d - Output[index];
-                
-             if (penaltytype == 1){
-            /* Huber penalty */
-            if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
-            else e1 = e1/sigmaPar;
-            
-            if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
-            else w1 = w1/sigmaPar;
-            
-            if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
-            else n1 = n1/sigmaPar;
-            
-            if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
-            else s1 = s1/sigmaPar;
-            
-            if (fabs(u1) > sigmaPar) u1 =  signNDFc(u1);
-            else u1 = u1/sigmaPar;
-            
-            if (fabs(d1) > sigmaPar) d1 =  signNDFc(d1);
-            else d1 = d1/sigmaPar;            
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
-            u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
-            d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            if (fabs(u1) <= sigmaPar) u1 =  u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
-            else u1 = 0.0f;
-            if (fabs(d1) <= sigmaPar) d1 =  d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
-            else d1 = 0.0f;
-            }
-            else {
-				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-				break;
-				}
-
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
-		}}}
-	return *Output;
-}
diff --git a/Core/regularisers_CPU/Diffusion_core.h b/Core/regularisers_CPU/Diffusion_core.h
deleted file mode 100644
index cc36dad..0000000
--- a/Core/regularisers_CPU/Diffusion_core.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-
-/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for explicit scheme
- * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb,  float tau, int penaltytype, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY);
-CCPI_EXPORT float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY);
-CCPI_EXPORT float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/FGP_TV_core.c b/Core/regularisers_CPU/FGP_TV_core.c
deleted file mode 100644
index 68d58b7..0000000
--- a/Core/regularisers_CPU/FGP_TV_core.c
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambdaPar - regularization parameter 
- * 3. Number of iterations
- * 4. eplsilon: tolerance constant 
- * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
- * 6. nonneg: 'nonnegativity (0 is OFF by default) 
- * 7. print information: 0 (off) or 1 (on) 
- *
- * Output:
- * [1] Filtered/regularized image
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- */
- 
-float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
-{
-	int ll;
-    long j, DimTotal;
-	float re, re1;
-	float tk = 1.0f;
-    float tkp1=1.0f;
-    int count = 0;
-	
-	if (dimZ <= 1) {
-		/*2D case */
-		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
-		DimTotal = (long)(dimX*dimY);
-		
-        Output_prev = calloc(DimTotal, sizeof(float));
-        P1 = calloc(DimTotal, sizeof(float));
-        P2 = calloc(DimTotal, sizeof(float));
-        P1_prev = calloc(DimTotal, sizeof(float));
-        P2_prev = calloc(DimTotal, sizeof(float));
-        R1 = calloc(DimTotal, sizeof(float));
-        R2 = calloc(DimTotal, sizeof(float)); 
-		
-		/* begin iterations */
-        for(ll=0; ll<iterationsNumb; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
-            
-            /* apply nonnegativity */
-            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
-            
-            /* projection step */
-            Proj_func2D(P1, P2, methodTV, DimTotal);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
-            
-            /* check early stopping criteria */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<DimTotal; j++)
-            {
-                re += pow(Output[j] - Output_prev[j],2);
-                re1 += pow(Output[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-				if (count > 4) break;
-            
-            /*storing old values*/
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
-            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
-            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
-            tk = tkp1;
-        }
-        if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);   
-		free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);		
-	}
-	else {
-		/*3D case*/
-		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;		
-		DimTotal = (long)(dimX*dimY*dimZ);        
-        
-        Output_prev = calloc(DimTotal, sizeof(float));
-        P1 = calloc(DimTotal, sizeof(float));
-        P2 = calloc(DimTotal, sizeof(float));
-        P3 = calloc(DimTotal, sizeof(float));
-        P1_prev = calloc(DimTotal, sizeof(float));
-        P2_prev = calloc(DimTotal, sizeof(float));        
-        P3_prev = calloc(DimTotal, sizeof(float));        
-        R1 = calloc(DimTotal, sizeof(float));
-        R2 = calloc(DimTotal, sizeof(float)); 
-        R3 = calloc(DimTotal, sizeof(float)); 
-		
-		    /* begin iterations */
-        for(ll=0; ll<iterationsNumb; ll++) {
-            
-            /* computing the gradient of the objective function */
-            Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* apply nonnegativity */
-            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}  
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* projection step */
-            Proj_func3D(P1, P2, P3, methodTV, DimTotal);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
-            
-            /* calculate norm - stopping rules*/
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<DimTotal; j++)
-            {
-                re += pow(Output[j] - Output_prev[j],2);
-                re1 += pow(Output[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-                        
-            /*storing old values*/
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            tk = tkp1;            
-        }	
-		if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);   
-		free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3);
-	}
-	return *Output;
-}
-
-float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
-{
-    float val1, val2;
-    long i,j,index;
-#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
-            /* boundary conditions  */
-            if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
-            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
-            D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
-        }}
-    return *D;
-}
-float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda,  long dimX, long dimY)
-{
-    float val1, val2, multip;
-    long i,j,index;
-    multip = (1.0f/(8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
-            /* boundary conditions */
-            if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
-            if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
-            P1[index] = R1[index] + multip*val1;
-            P2[index] = R2[index] + multip*val2;
-        }}
-    return 1;
-}
-float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal)
-{
-    float val1, val2, denom, sq_denom;
-    long i;
-    if (methTV == 0) {
-        /* isotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
-        for(i=0; i<DimTotal; i++) {
-                denom = powf(P1[i],2) +  powf(P2[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                }
-            }
-    }
-    else {
-        /* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
-        for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-            }
-    }
-    return 1;
-}
-float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
-{
-    long i;
-    float multip;
-    multip = ((tk-1.0f)/tkp1);
-#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
-    for(i=0; i<DimTotal; i++) {       
-            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-        }
-    return 1;
-}
-
-/* 3D-case related Functions */
-/*****************************************************************/
-float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
-{
-    float val1, val2, val3;
-    long i,j,k,index;
-#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
-                /* boundary conditions */
-                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
-                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
-                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
-                D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
-            }}}
-    return *D;
-}
-float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
-{
-    float val1, val2, val3, multip;
-    long i,j,k, index;
-    multip = (1.0f/(26.0f*lambda));
-#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;				
-                /* boundary conditions */
-                if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
-                if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
-                if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
-                P1[index] = R1[index] + multip*val1;
-                P2[index] = R2[index] + multip*val2;
-                P3[index] = R3[index] + multip*val3;
-            }}}
-    return 1;
-}
-float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
-{		
-    float val1, val2, val3, denom, sq_denom;
-    long i;
-    if (methTV == 0) {
-	/* isotropic TV*/
-	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
-    for(i=0; i<DimTotal; i++) {        
-				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                    P3[i] = P3[i]*sq_denom;
-                }
-			}
-	}    
-    else {
-    /* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
-    for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                val3 = fabs(P3[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}                
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-                P3[i] = P3[i]/val3;
-            }
-		}
-    return 1;
-}
-float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
-{
-    long i;
-    float multip;
-    multip = ((tk-1.0f)/tkp1);
-#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
-    for(i=0; i<DimTotal; i++) {
-                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
-            }
-    return 1;
-}
diff --git a/Core/regularisers_CPU/FGP_TV_core.h b/Core/regularisers_CPU/FGP_TV_core.h
deleted file mode 100644
index 3418604..0000000
--- a/Core/regularisers_CPU/FGP_TV_core.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter 
- * 3. Number of iterations
- * 4. eplsilon: tolerance constant 
- * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
- * 6. nonneg: 'nonnegativity (0 is OFF by default) 
- * 7. print information: 0 (off) or 1 (on) 
- *
- * Output:
- * [1] Filtered/regularized image
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- */
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
-
-CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
-CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
-CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal);
-CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal);
-
-CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal);
-CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/FGP_dTV_core.c b/Core/regularisers_CPU/FGP_dTV_core.c
deleted file mode 100644
index 17b75ff..0000000
--- a/Core/regularisers_CPU/FGP_dTV_core.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "FGP_dTV_core.h"
-
-/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
- * which employs structural similarity of the level sets of two images/volumes, see [1,2]
- * The current implementation updates image 1 while image 2 is being fixed.
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
- * 3. lambdaPar - regularization parameter [REQUIRED]
- * 4. Number of iterations [OPTIONAL]
- * 5. eplsilon: tolerance constant [OPTIONAL]
- * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
- * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
- * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
- * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
- *
- * Output:
- * [1] Filtered/regularized image/volume
- *
- * This function is based on the Matlab's codes and papers by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
- */
- 
-float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
-{
-	int ll;
-    long j, DimTotal;
-	float re, re1;
-	float tk = 1.0f;
-    float tkp1=1.0f;
-    int count = 0;
-	
-	if (dimZ <= 1) {
-		/*2D case */
-		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL;
-		DimTotal = (long)(dimX*dimY);
-		
-        Output_prev = calloc(DimTotal, sizeof(float));
-        P1 = calloc(DimTotal, sizeof(float));
-        P2 = calloc(DimTotal, sizeof(float));
-        P1_prev = calloc(DimTotal, sizeof(float));
-        P2_prev = calloc(DimTotal, sizeof(float));
-        R1 = calloc(DimTotal, sizeof(float));
-        R2 = calloc(DimTotal, sizeof(float)); 
-        InputRef_x = calloc(DimTotal, sizeof(float)); 
-        InputRef_y = calloc(DimTotal, sizeof(float)); 
-
-		/* calculate gradient field (smoothed) for the reference image */
-		GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, (long)(dimX), (long)(dimY));
-		
-		/* begin iterations */
-        for(ll=0; ll<iterationsNumb; ll++) {
-            
-            /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/                    
-            ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, (long)(dimX), (long)(dimY));
-            
-            /* computing the gradient of the objective function */
-            Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
-            
-            /* apply nonnegativity */
-            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, (long)(dimX), (long)(dimY));
-            
-            /* projection step */
-            Proj_dfunc2D(P1, P2, methodTV, DimTotal);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
-            
-            /* check early stopping criteria */
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<DimTotal; j++)
-            {
-                re += pow(Output[j] - Output_prev[j],2);
-                re1 += pow(Output[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-				if (count > 4) break;
-            
-            /*storing old values*/
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
-            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
-            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
-            tk = tkp1;
-        }
-        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);   
-		free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y);
-	}
-	else {
-		/*3D case*/
-		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL; 
-		DimTotal = (long)(dimX*dimY*dimZ);
-        
-        Output_prev = calloc(DimTotal, sizeof(float));
-        P1 = calloc(DimTotal, sizeof(float));
-        P2 = calloc(DimTotal, sizeof(float));
-        P3 = calloc(DimTotal, sizeof(float));
-        P1_prev = calloc(DimTotal, sizeof(float));
-        P2_prev = calloc(DimTotal, sizeof(float));
-        P3_prev = calloc(DimTotal, sizeof(float));
-        R1 = calloc(DimTotal, sizeof(float));
-        R2 = calloc(DimTotal, sizeof(float)); 
-        R3 = calloc(DimTotal, sizeof(float)); 
-        InputRef_x = calloc(DimTotal, sizeof(float)); 
-        InputRef_y = calloc(DimTotal, sizeof(float)); 
-        InputRef_z = calloc(DimTotal, sizeof(float)); 
-
-		/* calculate gradient field (smoothed) for the reference volume */
-		GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, (long)(dimX), (long)(dimY), (long)(dimZ));
-		
-		/* begin iterations */
-        for(ll=0; ll<iterationsNumb; ll++) {
-
-			 /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
-            ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* computing the gradient of the objective function */
-            Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* apply nonnegativity */
-            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}  
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* projection step */
-            Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal);
-            
-            /*updating R and t*/
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
-            
-            /* calculate norm - stopping rules*/
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<DimTotal; j++)
-            {
-                re += pow(Output[j] - Output_prev[j],2);
-                re1 += pow(Output[j],2);
-            }
-            re = sqrt(re)/sqrt(re1);
-            /* stop if the norm residual is less than the tolerance EPS */
-            if (re < epsil)  count++;
-            if (count > 4) break;            
-                        
-            /*storing old values*/
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            tk = tkp1;            
-        }	
-		if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);   
-		free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); free(InputRef_x); free(InputRef_y); free(InputRef_z);
-	}
-	return *Output;
-}
-
-
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-
-float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, long dimY)
-{
-    long i,j,index;
-    float val1, val2, gradX, gradY, magn;
-#pragma omp parallel for shared(B, B_x, B_y) private(i,j,index,val1,val2,gradX,gradY,magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
-            /* zero boundary conditions */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[j*dimX + (i+1)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(j+1)*dimX + i];}
-            gradX = val1 - B[index];
-            gradY = val2 - B[index];
-            magn = pow(gradX,2) + pow(gradY,2);
-            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
-            B_x[index] = gradX/magn;
-            B_y[index] = gradY/magn;
-        }}
-    return 1;
-}
-
-float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX, long dimY)
-{
-    long i,j,index;
-    float in_prod;
-#pragma omp parallel for shared(R1, R2, B_x, B_y) private(index,i,j,in_prod)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
-            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index];   /* calculate inner product */
-            R1[index] = R1[index] - in_prod*B_x[index];
-            R2[index] = R2[index] - in_prod*B_y[index];
-        }}
-    return 1;
-}
-
-float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
-{
-    float val1, val2;
-    long i,j,index;
-#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
-            /* boundary conditions  */
-            if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
-            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
-            D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
-        }}
-    return *D;
-}
-float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, long dimX, long dimY)
-{
-    float val1, val2, multip, in_prod;
-    long i,j,index;
-    multip = (1.0f/(8.0f*lambda));
-#pragma omp parallel for shared(P1,P2,D,R1,R2,B_x,B_y,multip) private(i,j,index,val1,val2,in_prod)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			index = j*dimX+i;
-            /* boundary conditions */
-            if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
-            if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
-            
-            in_prod = val1*B_x[index] + val2*B_y[index];   /* calculate inner product */
-            val1 = val1 - in_prod*B_x[index];
-            val2 = val2 - in_prod*B_y[index];
-            
-            P1[index] = R1[index] + multip*val1;
-            P2[index] = R2[index] + multip*val2;
-
-        }}
-    return 1;
-}
-float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal)
-{
-    float val1, val2, denom, sq_denom;
-    long i;
-    if (methTV == 0) {
-        /* isotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
-        for(i=0; i<DimTotal; i++) {
-                denom = powf(P1[i],2) +  powf(P2[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                }
-            }
-    }
-    else {
-        /* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
-        for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-            }
-    }
-    return 1;
-}
-float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
-{
-    long i;
-    float multip;
-    multip = ((tk-1.0f)/tkp1);
-#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
-    for(i=0; i<DimTotal; i++) {       
-            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-        }
-    return 1;
-}
-
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, long dimX, long dimY, long dimZ)
-{
-    long i, j, k, index;
-    float val1, val2, val3, gradX, gradY, gradZ, magn;
-#pragma omp parallel for shared(B, B_x, B_y, B_z) private(i,j,k,index,val1,val2,val3,gradX,gradY,gradZ,magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-			index = (dimX*dimY)*k + j*dimX+i;
-			
-            /* zero boundary conditions */
-            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[(dimX*dimY)*k + j*dimX+(i+1)];}
-            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(dimX*dimY)*k + (j+1)*dimX+i];}
-            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = B[(dimX*dimY)*(k+1) + (j)*dimX+i];}
-            
-            gradX = val1 - B[index];
-            gradY = val2 - B[index];
-            gradZ = val3 - B[index];
-            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
-            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
-            B_x[index] = gradX/magn;
-            B_y[index] = gradY/magn;
-            B_z[index] = gradZ/magn;
-        }}}
-    return 1;
-}
-
-float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, long dimX, long dimY, long dimZ)
-{
-    long i,j,k,index;
-    float in_prod;
-#pragma omp parallel for shared(R1, R2, R3, B_x, B_y, B_z) private(index,i,j,k,in_prod)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-			index = (dimX*dimY)*k + j*dimX+i;
-            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index] + R3[index]*B_z[index];   /* calculate inner product */
-            R1[index] = R1[index] - in_prod*B_x[index];
-            R2[index] = R2[index] - in_prod*B_y[index];
-            R3[index] = R3[index] - in_prod*B_z[index];
-        }}}
-    return 1;
-}
-
-float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
-{
-    float val1, val2, val3;
-    long i,j,k,index;
-#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
-                /* boundary conditions */
-                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
-                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
-                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
-                D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
-            }}}
-    return *D;
-}
-float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, long dimX, long dimY, long dimZ)
-{
-    float val1, val2, val3, multip, in_prod;
-    long i,j,k, index;
-    multip = (1.0f/(26.0f*lambda));
-#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3,in_prod)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;				
-                /* boundary conditions */
-                if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
-                if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
-                if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
-                
-                in_prod = val1*B_x[index] + val2*B_y[index] + val3*B_z[index];   /* calculate inner product */
-                val1 = val1 - in_prod*B_x[index];
-                val2 = val2 - in_prod*B_y[index];
-                val3 = val3 - in_prod*B_z[index];
-                
-                P1[index] = R1[index] + multip*val1;
-                P2[index] = R2[index] + multip*val2;
-                P3[index] = R3[index] + multip*val3;
-            }}}
-    return 1;
-}
-float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
-{		
-    float val1, val2, val3, denom, sq_denom;
-    long i;
-    if (methTV == 0) {
-	/* isotropic TV*/
-	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
-    for(i=0; i<DimTotal; i++) {        
-				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
-                if (denom > 1.0f) {
-					sq_denom = 1.0f/sqrtf(denom);
-                    P1[i] = P1[i]*sq_denom;
-                    P2[i] = P2[i]*sq_denom;
-                    P3[i] = P3[i]*sq_denom;
-                }
-			}
-	}    
-    else {
-    /* anisotropic TV*/
-#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
-    for(i=0; i<DimTotal; i++) {
-                val1 = fabs(P1[i]);
-                val2 = fabs(P2[i]);
-                val3 = fabs(P3[i]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}                
-                P1[i] = P1[i]/val1;
-                P2[i] = P2[i]/val2;
-                P3[i] = P3[i]/val3;
-            }
-		}
-    return 1;
-}
-float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
-{
-    long i;
-    float multip;
-    multip = ((tk-1.0f)/tkp1);
-#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
-    for(i=0; i<DimTotal; i++) {
-                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
-                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
-                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
-            }
-    return 1;
-}
diff --git a/Core/regularisers_CPU/FGP_dTV_core.h b/Core/regularisers_CPU/FGP_dTV_core.h
deleted file mode 100644
index 442dd30..0000000
--- a/Core/regularisers_CPU/FGP_dTV_core.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-//#include <matrix.h>
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
- * which employs structural similarity of the level sets of two images/volumes, see [1,2]
- * The current implementation updates image 1 while image 2 is being fixed.
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
- * 3. lambdaPar - regularization parameter [REQUIRED]
- * 4. Number of iterations [OPTIONAL]
- * 5. eplsilon: tolerance constant [OPTIONAL]
- * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
- * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
- * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
- * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
- *
- * Output:
- * [1] Filtered/regularized image/volume
- *
- * This function is based on the Matlab's codes and papers by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
- */
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
-
-CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, long dimY);
-CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX, long dimY);
-CCPI_EXPORT float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
-CCPI_EXPORT float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, long dimX, long dimY);
-CCPI_EXPORT float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal);
-CCPI_EXPORT float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal);
-
-CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal);
-CCPI_EXPORT float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/LLT_ROF_core.c b/Core/regularisers_CPU/LLT_ROF_core.c
deleted file mode 100644
index 8416a14..0000000
--- a/Core/regularisers_CPU/LLT_ROF_core.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "LLT_ROF_core.h"
-#define EPS_LLT 0.01
-#define EPS_ROF 1.0e-12
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-/*sign function*/
-int signLLT(float x) {
-    return (x > 0) - (x < 0);
-}
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
- * 
-* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
-* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
-* lambdaLLT starting with smaller values. 
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambdaROF - ROF-related regularisation parameter
-* 3. lambdaLLT - LLT-related regularisation parameter
-* 4. tau - time-marching step 
-* 5. iter - iterations number (for both models)
-*
-* Output:
-* Filtered/regularised image
-*
-* References: 
-* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*/
-
-float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
-{
-		long DimTotal;
-        int ll;
-		float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL;
-		
-		DimTotal = (long)(dimX*dimY*dimZ);
-        
-        D1_ROF = calloc(DimTotal, sizeof(float));
-        D2_ROF = calloc(DimTotal, sizeof(float));
-        D3_ROF = calloc(DimTotal, sizeof(float));
-        
-        D1_LLT = calloc(DimTotal, sizeof(float));
-        D2_LLT = calloc(DimTotal, sizeof(float));
-        D3_LLT = calloc(DimTotal, sizeof(float));
-        
-        copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize  */
-       
-		for(ll = 0; ll < iterationsNumb; ll++) {            
-            if (dimZ == 1) {
-			/* 2D case */
-			/****************ROF******************/
-			 /* calculate first-order differences */
-            D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), 1l);
-            D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), 1l);
-            /****************LLT******************/
-            /* estimate second-order derrivatives */
-            der2D_LLT(Output, D1_LLT, D2_LLT, (long)(dimX), (long)(dimY), 1l);
-            /* Joint update for ROF and LLT models */
-            Update2D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), 1l);
-            }
-            else {
-			/* 3D case */
-			/* calculate first-order differences */
-            D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
-            D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
-            D3_func_ROF(Output, D3_ROF, (long)(dimX), (long)(dimY), (long)(dimZ)); 
-            /****************LLT******************/
-            /* estimate second-order derrivatives */
-            der3D_LLT(Output, D1_LLT, D2_LLT, D3_LLT,(long)(dimX), (long)(dimY), (long)(dimZ));
-            /* Joint update for ROF and LLT models */
-            Update3D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-			}
-        } /*end of iterations*/
-    free(D1_LLT);free(D2_LLT);free(D3_LLT);
-    free(D1_ROF);free(D2_ROF);free(D3_ROF);
-	return *Output;
-}
-
-/*************************************************************************/
-/**********************LLT-related functions *****************************/
-/*************************************************************************/
-float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ)
-{
-	long i, j, index, i_p, i_m, j_m, j_p;
-	float dxx, dyy, denom_xx, denom_yy;
-#pragma omp parallel for shared(U,D1,D2) private(i, j, index, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			index = j*dimX+i;
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
-			dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
-
-			denom_xx = fabs(dxx) + EPS_LLT;
-			denom_yy = fabs(dyy) + EPS_LLT;
-
-			D1[index] = dxx / denom_xx;
-			D2[index] = dyy / denom_yy;
-		}
-	}
-	return 1;
-}
-
-float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ)
- {
- 	long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
- 	float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
- #pragma omp parallel for shared(U,D1,D2,D3) private(i, j, index, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
- 	for (i = 0; i<dimX; i++) {
- 		for (j = 0; j<dimY; j++) {
- 			for (k = 0; k<dimZ; k++) {
-				/* symmetric boundary conditions (Neuman) */
-				i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-				i_m = i - 1; if (i_m < 0) i_m = i + 1;
-				j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-				j_m = j - 1; if (j_m < 0) j_m = j + 1;
- 				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
- 				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-				
-				index = (dimX*dimY)*k + j*dimX+i;
- 
- 				dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
- 				dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
- 				dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
- 
- 				denom_xx = fabs(dxx) + EPS_LLT;
- 				denom_yy = fabs(dyy) + EPS_LLT;
- 				denom_zz = fabs(dzz) + EPS_LLT;
- 
- 				D1[index] = dxx / denom_xx;
- 				D2[index] = dyy / denom_yy;
- 				D3[index] = dzz / denom_zz;
- 			}
- 		}
- 	}
- 	return 1;
- }
-
-/*************************************************************************/
-/**********************ROF-related functions *****************************/
-/*************************************************************************/
-
-/* calculate differences 1 */
-float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ)
-{
-    float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
-    long i,j,k,i1,i2,k1,j1,j2,k2,index;
-    
-    if (dimZ > 1) {
-#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-					index = (dimX*dimY)*k + j*dimX+i;
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
-                    NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
-                    /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */  /* x- */
-                    NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
-                    
-                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
-                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMx_1*NOMx_1;
-                    denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5f*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T1 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
-                    D1[index] = NOMx_1/T1;
-                }}}
-    }
-    else {
-#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
-                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
-                /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
-                NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
-                
-                denom1 = NOMx_1*NOMx_1;
-                denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
-                denom2 = denom2*denom2;
-                T1 = sqrtf(denom1 + denom2 + EPS_ROF);
-                D1[index] = NOMx_1/T1;
-            }}
-    }
-    return *D1;
-}
-/* calculate differences 2 */
-float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ)
-{
-    float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
-    long i,j,k,i1,i2,k1,j1,j2,k2,index;
-    
-    if (dimZ > 1) {
-#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-					index = (dimX*dimY)*k + j*dimX+i;
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
-                    NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
-                    NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
-                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMy_1*NOMy_1;
-                    denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5f*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T2 = sqrtf(denom1 + denom2 + denom3 + EPS_ROF);
-                    D2[index] = NOMy_1/T2;
-                }}}
-    }
-    else {
-#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
-                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
-                NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
-                /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */  /* y- */
-                
-                denom1 = NOMy_1*NOMy_1;
-                denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
-                denom2 = denom2*denom2;
-                T2 = sqrtf(denom1 + denom2 + EPS_ROF);
-                D2[index] = NOMy_1/T2;
-            }}
-    }
-    return *D2;
-}
-
-/* calculate differences 3 */
-float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
-{
-    float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
-    long index,i,j,k,i1,i2,k1,j1,j2,k2;
-    
-#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
-    for(j=0; j<dimY; j++) {
-        for(i=0; i<dimX; i++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                k2 = k - 1; if (k2 < 0) k2 = k+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
-                NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
-                NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
-                NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
-                /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
-                
-                denom1 = NOMz_1*NOMz_1;
-                denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
-                denom2 = denom2*denom2;
-                denom3 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
-                denom3 = denom3*denom3;
-                T3 = sqrtf(denom1 + denom2 + denom3 + EPS_ROF);
-                D3[index] = NOMz_1/T3;
-            }}}
-    return *D3;
-}
-
-/*************************************************************************/
-/**********************ROF-LLT-related functions *************************/
-/*************************************************************************/
-
-float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
-{
-	long i, j, index, i_p, i_m, j_m, j_p;
-	float div, laplc, dxx, dyy, dv1, dv2;
-#pragma omp parallel for shared(U,U0) private(i, j, index, i_p, i_m, j_m, j_p, laplc, div, dxx, dyy, dv1, dv2)
-	for (i = 0; i<dimX; i++) {
-		for (j = 0; j<dimY; j++) {
-			index = j*dimX+i;
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-			
-			/*LLT-related part*/
-			dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
-			dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
-			laplc = dxx + dyy; /*build Laplacian*/
-			
-			/*ROF-related part*/
-			dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
-            dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
-			div = dv1 + dv2; /*build Divirgent*/
-            
-			/*combine all into one cost function to minimise */
-            U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
-		}
-	}
-	return *U;
-}
-
-float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
-{
-	long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
-	float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
-#pragma omp parallel for shared(U,U0) private(i, j, k, index, i_p, i_m, j_m, j_p, k_p, k_m, laplc, div, dxx, dyy, dzz, dv1, dv2, dv3)
- 	for (i = 0; i<dimX; i++) {
- 		for (j = 0; j<dimY; j++) {
- 			for (k = 0; k<dimZ; k++) {
-				/* symmetric boundary conditions (Neuman) */
-				i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-				i_m = i - 1; if (i_m < 0) i_m = i + 1;
-				j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-				j_m = j - 1; if (j_m < 0) j_m = j + 1;
- 				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
- 				k_m = k - 1; if (k_m < 0) k_m = k + 1;
-			
-				index = (dimX*dimY)*k + j*dimX+i;
-			
-				/*LLT-related part*/
-				dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
-				dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
-				dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
-				laplc = dxx + dyy + dzz; /*build Laplacian*/
-			
-				/*ROF-related part*/
-				dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
-				dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
-				dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
-				div = dv1 + dv2 + dv3; /*build Divirgent*/
-            
-				/*combine all into one cost function to minimise */
-				U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
-			}
-		}
-	}
-	return *U;
-}
-
diff --git a/Core/regularisers_CPU/LLT_ROF_core.h b/Core/regularisers_CPU/LLT_ROF_core.h
deleted file mode 100644
index 8e6591e..0000000
--- a/Core/regularisers_CPU/LLT_ROF_core.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
- * 
-* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
-* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
-* lambdaLLT starting with smaller values. 
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambdaROF - ROF-related regularisation parameter
-* 3. lambdaLLT - LLT-related regularisation parameter
-* 4. tau - time-marching step 
-* 5. iter - iterations number (for both models)
-*
-* Output:
-* Filtered/regularised image
-*
-* References: 
-* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-
-CCPI_EXPORT float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ);
-
-CCPI_EXPORT float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ);
-
-CCPI_EXPORT float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/Nonlocal_TV_core.c b/Core/regularisers_CPU/Nonlocal_TV_core.c
deleted file mode 100644
index c4c9118..0000000
--- a/Core/regularisers_CPU/Nonlocal_TV_core.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Nonlocal_TV_core.h"
-
-/* C-OMP implementation of non-local regulariser
- * Weights and associated indices must be given as an input.
- * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
- * goes in pre-calculation of weights and selection of patches
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. AR_i - indeces of i neighbours
- * 3. AR_j - indeces of j neighbours
- * 4. AR_k - indeces of k neighbours (0 - for 2D case)
- * 5. Weights_ij(k) - associated weights 
- * 6. regularisation parameter
- * 7. iterations number 
- 
- * Output:
- * 1. denoised image/volume 	
- * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
- 
- */
-/*****************************************************************************/
-
-float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb)
-{
-
-    long i, j, k;
-    int iter;
-    lambdaReg = 1.0f/lambdaReg;
-         
-    /*****2D INPUT *****/
-    if (dimZ == 0) {
-	  copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
-    /* for each pixel store indeces of the most similar neighbours (patches) */
-     for(iter=0; iter<IterNumb; iter++) {    
-#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
-      for(i=0; i<(long)(dimX); i++) {
-            for(j=0; j<(long)(dimY); j++) {              
-             /*NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);*/  /* NLM - H1 penalty */
-             NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);  /* NLM - TV penalty */
-           }}
-          }
-    }  
-    else {
-     /*****3D INPUT *****/
-        copyIm(A_orig, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-    /* for each pixel store indeces of the most similar neighbours (patches) */
-     for(iter=0; iter<IterNumb; iter++) {    
-#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, H_k, iter) private(i,j,k)
-      for(i=0; i<(long)(dimX); i++) {
-            for(j=0; j<(long)(dimY); j++) {              
-               for(k=0; k<(long)(dimZ); k++) {
-            /* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambdaReg); */ /* NLM - H1 penalty */
-            NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg);   /* NLM - TV penalty */     
-           }}}          
-          }          
-    }
-    return *Output;
-}
-
-/***********<<<<Main Function for NLM - H1 penalty>>>>**********/
-float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
-{
-	long x, i1, j1, index, index_m; 
-	float value = 0.0f, normweight  = 0.0f;
-	
-	index_m = j*dimX+i;
-	for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		value += A[j1*dimX+i1]*Weights[index];
-		normweight += Weights[index];
-	}
-	 A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
-    return *A;
-}
-/*3D version*/
-float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
-{
-	long x, i1, j1, k1, index; 
-	float value = 0.0f, normweight  = 0.0f;
-	
-	for(x=0; x < NumNeighb; x++) {
-	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		k1 = H_k[index];
-		value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
-		normweight += Weights[index];
-	}	
-    A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
-    return *A;
-}
-
-
-/***********<<<<Main Function for NLM - TV penalty>>>>**********/
-float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
-{
-	long x, i1, j1, index, index_m; 
-	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-	
-	 index_m = j*dimX+i;
-		
-	for(x=0; x < NumNeighb; x++) {
-		index =  (dimX*dimY*x) + j*dimX+i; /*c*/
-		i1 = H_i[index];
-		j1 = H_j[index];
-		NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index];
-	}
-  
-    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
-    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-    		
-    for(x=0; x < NumNeighb; x++) {
-	index =  (dimX*dimY*x) + j*dimX+i; /*c*/
-	i1 = H_i[index];
-	j1 = H_j[index];
-        value += A[j1*dimX+i1]*NLCoeff*Weights[index];
-        normweight += Weights[index]*NLCoeff;
-    }   		
-    A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
-    return *A;
-}
-/*3D version*/
-float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
-{
-	long x, i1, j1, k1, index; 
-	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
-	
-	for(x=0; x < NumNeighb; x++) {
-	index =  dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-		i1 = H_i[index];
-		j1 = H_j[index];
-		k1 = H_k[index];
-	        NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
-	}
-  
-    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
-    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
-    		
-    for(x=0; x < NumNeighb; x++) {
-	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-	i1 = H_i[index];
-	j1 = H_j[index];
-	k1 = H_k[index];
-        value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index];
-        normweight += Weights[index]*NLCoeff;
-    }   		
-    A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
-    return *A;
-}
diff --git a/Core/regularisers_CPU/Nonlocal_TV_core.h b/Core/regularisers_CPU/Nonlocal_TV_core.h
deleted file mode 100644
index 6d55101..0000000
--- a/Core/regularisers_CPU/Nonlocal_TV_core.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-#define EPS 1.0000e-9
-
-/* C-OMP implementation of non-local regulariser
- * Weights and associated indices must be given as an input.
- * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
- * goes in pre-calculation of weights and selection of patches
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. AR_i - indeces of i neighbours
- * 3. AR_j - indeces of j neighbours
- * 4. AR_k - indeces of k neighbours (0 - for 2D case)
- * 5. Weights_ij(k) - associated weights 
- * 6. regularisation parameter
- * 7. iterations number 
- 
- * Output:
- * 1. denoised image/volume 	
- * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans.   Image Processing 17, no. 7 (2008): 1047-1060. 
- */
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
-CCPI_EXPORT float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
-CCPI_EXPORT float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
-CCPI_EXPORT float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
-CCPI_EXPORT float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/PatchSelect_core.c b/Core/regularisers_CPU/PatchSelect_core.c
deleted file mode 100644
index cf5cdc7..0000000
--- a/Core/regularisers_CPU/PatchSelect_core.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PatchSelect_core.h"
-
-/* C-OMP implementation of non-local weight pre-calculation for non-local priors
- * Weights and associated indices are stored into pre-allocated arrays and passed
- * to the regulariser
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
- * 3. Similarity window (half-size of the patch window, e.g. 2)
- * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
- * 5. noise-related parameter to calculate non-local weights
- *
- * Output [2D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. Weights_ij - associated weights
- *
- * Output [3D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. AR_k - indeces of j neighbours
- * 4. Weights_ijk - associated weights
- */
-
-void swap(float *xp, float *yp) 
-{ 
-    float temp = *xp; 
-    *xp = *yp; 
-    *yp = temp; 
-} 
-
-void swapUS(unsigned short *xp, unsigned short *yp) 
-{ 
-    unsigned short temp = *xp; 
-    *xp = *yp; 
-    *yp = temp; 
-} 
-/**************************************************/
-
-float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM)
-{
-    int counterG;
-    long i, j, k;
-    float *Eucl_Vec, h2;
-    h2 = h*h;   
-    /****************2D INPUT ***************/
-    if (dimZ == 0) {
-        /* generate a 2D Gaussian kernel for NLM procedure */
-        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
-        counterG = 0;
-        for(i=-SimilarWin; i<=SimilarWin; i++) {
-            for(j=-SimilarWin; j<=SimilarWin; j++) {
-                Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2*SimilarWin*SimilarWin));
-                counterG++;
-            }} /*main neighb loop */
-        /* for each pixel store indeces of the most similar neighbours (patches) */
-        if (switchM == 1) {
-#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
-    for(i=0; i<(long)(dimX); i++) {
-          for(j=0; j<(long)(dimY); j++) {
-                Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
-            }}
-        }
-        else {
-#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
-    for(i=0; i<(long)(dimX); i++) {
-          for(j=0; j<(long)(dimY); j++) {
-                Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
-            }}
-            }
-    }
-    else {
-    /****************3D INPUT ***************/       
-        /* generate a 3D Gaussian kernel for NLM procedure */
-        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
-        counterG = 0;
-        for(i=-SimilarWin; i<=SimilarWin; i++) {
-            for(j=-SimilarWin; j<=SimilarWin; j++) {
-                for(k=-SimilarWin; k<=SimilarWin; k++) {
-                    Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
-                    counterG++;
-                }}} /*main neighb loop */     
-        
-        /* for each voxel store indeces of the most similar neighbours (patches) */
-        if (switchM == 1) {
-#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
-        for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                for(k=0; k<dimZ; k++) {
-                    Indeces3D(A, H_i, H_j, H_k, Weights, j, i, (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
-                }}}
-        }
-        else {
-#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
-        for(i=0; i<dimX; i++) {
-            for(j=0; j<dimY; j++) {
-                for(k=0; k<dimZ; k++) {
-                    Indeces3D(A, H_i, H_j, H_k, Weights, (i), (j), (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
-                }}}
-            }
-    }
-    free(Eucl_Vec);
-    return 1;
-}
-
-float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
-    float *Weight_Vec, normsum;
-    unsigned short *ind_i, *ind_j;
-    
-    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
-    
-    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
-    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
-                            if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
-                                normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
-                                counterG++;
-                            }}
-                        
-                    }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {                    
-                    Weight_Vec[counter] = expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;                    
-                    counter++;
-                }
-            }
-        }}     
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }
-     /*sorting loop finished*/      
-    /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ 
-    for(x=0; x < NumNeighb; x++) {
-        index = (dimX*dimY*x) + j*dimX+i;        
-        H_i[index] = ind_i[x];
-        H_j[index] = ind_j[x];
-        Weights[index] = Weight_Vec[x];
-    }    
-    free(ind_i);
-    free(ind_j);
-    free(Weight_Vec);
-    return 1;
-}
-float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
-    float *Weight_Vec, normsum;
-    unsigned short *ind_i, *ind_j;
-    
-    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
-    
-    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
-    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
-                            if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
-                                //normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
-                                normsum += Eucl_Vec[counterG]*pow(Aorig[i3*dimY + (j3)] - Aorig[i2*dimY + (j2)], 2);
-                                counterG++;
-                            }}
-                        
-                    }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-            }
-        }}
-       /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }
-    /*sorting loop finished*/
-    
-    /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ 
-    for(x=0; x < NumNeighb; x++) {
-        index = (dimX*dimY*x) + i*dimY+j;       
-        H_i[index] = ind_i[x];
-        H_j[index] = ind_j[x];
-        Weights[index] = Weight_Vec[x];
-    }   
-    free(ind_i);
-    free(ind_j);
-    free(Weight_Vec);
-    return 1;
-}
-
-float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
-{
-    long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
-    float *Weight_Vec, normsum, temp;
-    unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
-    
-    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
-    
-    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
-    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
-    
-    counter = 0l;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            for(k_m=-SearchWindow; k_m<=SearchWindow; k_m++) {
-                k1 = k+k_m;
-                i1 = i+i_m;
-                j1 = j+j_m;
-                if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY)) && ((k1 >= 0) && (k1 < dimZ))) {
-                    normsum = 0.0f; counterG = 0l;
-                    for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                        for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                            for(k_c=-SimilarWin; k_c<=SimilarWin; k_c++) {
-                                i2 = i1 + i_c;
-                                j2 = j1 + j_c;
-                                k2 = k1 + k_c;
-                                i3 = i + i_c;
-                                j3 = j + j_c;
-                                k3 = k + k_c;
-                                if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY)) && ((k2 >= 0) && (k2 < dimZ))) {
-                                    if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY)) && ((k3 >= 0) && (k3 < dimZ))) {
-                                        normsum += Eucl_Vec[counterG]*pow(Aorig[(dimX*dimY*k3) + j3*dimX + (i3)] - Aorig[(dimX*dimY*k2) + j2*dimX + (i2)], 2);
-                                        counterG++;
-                                    }}
-                            }}}
-                    /* writing temporarily into vectors */
-                    if (normsum > EPS) {
-                        Weight_Vec[counter] = expf(-normsum/h2);
-                        ind_i[counter] = i1;
-                        ind_j[counter] = j1;
-                        ind_k[counter] = k1;
-                        counter ++;
-                    }
-                }
-            }}}
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter; x++)  {
-        for (y = 0; y < counter; y++)  {
-            if (Weight_Vec[y] < Weight_Vec[x]) {
-                temp = Weight_Vec[y+1];
-                temp_i = ind_i[y+1];
-                temp_j = ind_j[y+1];
-                temp_k = ind_k[y+1];
-                Weight_Vec[y+1] = Weight_Vec[y];
-                Weight_Vec[y] = temp;
-                ind_i[y+1] = ind_i[y];
-                ind_i[y] = temp_i;
-                ind_j[y+1] = ind_j[y];
-                ind_j[y] = temp_j;
-                ind_k[y+1] = ind_k[y];
-                ind_k[y] = temp_k;
-            }}}
-    /*sorting loop finished*/
-    
-    /*now select the NumNeighb more prominent weights and store into arrays */
-    for(x=0; x < NumNeighb; x++) {
-        index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
-        
-        H_i[index] = ind_i[x];
-        H_j[index] = ind_j[x];
-        H_k[index] = ind_k[x];
-        
-        Weights[index] = Weight_Vec[x];
-    }
-    
-    free(ind_i);
-    free(ind_j);
-    free(ind_k);
-    free(Weight_Vec);
-    return 1;
-}
-
diff --git a/Core/regularisers_CPU/PatchSelect_core.h b/Core/regularisers_CPU/PatchSelect_core.h
deleted file mode 100644
index ddaa428..0000000
--- a/Core/regularisers_CPU/PatchSelect_core.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-#define EPS 1.0000e-12
-
-/* C-OMP implementation of non-local weight pre-calculation for non-local priors
- * Weights and associated indices are stored into pre-allocated arrays and passed
- * to the regulariser
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
- * 3. Similarity window (half-size of the patch window, e.g. 2)
- * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
- * 5. noise-related parameter to calculate non-local weights
- *
- * Output [2D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. Weights_ij - associated weights
- *
- * Output [3D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. AR_k - indeces of j neighbours
- * 4. Weights_ijk - associated weights
- */
-/*****************************************************************************/
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
-CCPI_EXPORT float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
-CCPI_EXPORT float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
-CCPI_EXPORT float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/ROF_TV_core.c b/Core/regularisers_CPU/ROF_TV_core.c
deleted file mode 100644
index 1858442..0000000
--- a/Core/regularisers_CPU/ROF_TV_core.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ROF_TV_core.h"
-
-#define EPS 1.0e-12
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-/*sign function*/
-int sign(float x) {
-    return (x > 0) - (x < 0);
-}
-
-
-/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
- *
- * 
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
- * 4. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
- */
-
-/* Running iterations of TV-ROF function */
-float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
-{
-    float *D1, *D2, *D3;
-    int i; 
-    long DimTotal;
-    DimTotal = (long)(dimX*dimY*dimZ);    
-    
-    D1 = calloc(DimTotal, sizeof(float));
-    D2 = calloc(DimTotal, sizeof(float));
-    D3 = calloc(DimTotal, sizeof(float));
-	   
-    /* copy into output */
-    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
-        
-    /* start TV iterations */
-    for(i=0; i < iterationsNumb; i++) {            
-            /* calculate differences */
-            D1_func(Output, D1, (long)(dimX), (long)(dimY), (long)(dimZ));
-            D2_func(Output, D2, (long)(dimX), (long)(dimY), (long)(dimZ));
-            if (dimZ > 1) D3_func(Output, D3, (long)(dimX), (long)(dimY), (long)(dimZ)); 
-            TV_kernel(D1, D2, D3, Output, Input, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
-		}           
-    free(D1);free(D2); free(D3);
-    return *Output;
-}
-
-/* calculate differences 1 */
-float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ)
-{
-    float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
-    long i,j,k,i1,i2,k1,j1,j2,k2,index;
-    
-    if (dimZ > 1) {
-#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-					index = (dimX*dimY)*k + j*dimX+i;
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;                    
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
-                    NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
-                    /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */  /* x- */
-                    NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
-                    
-                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
-                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMx_1*NOMx_1;
-                    denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T1 = sqrt(denom1 + denom2 + denom3 + EPS);
-                    D1[index] = NOMx_1/T1;
-                }}}
-    }
-    else {
-#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
-                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
-                /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
-                NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
-                
-                denom1 = NOMx_1*NOMx_1;
-                denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
-                denom2 = denom2*denom2;
-                T1 = sqrtf(denom1 + denom2 + EPS);
-                D1[index] = NOMx_1/T1;
-            }}
-    }
-    return *D1;
-}
-/* calculate differences 2 */
-float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ)
-{
-    float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
-    long i,j,k,i1,i2,k1,j1,j2,k2,index;
-    
-    if (dimZ > 1) {
-#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-                    index = (dimX*dimY)*k + j*dimX+i;
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;                    
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
-                    NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
-                    NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
-                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMy_1*NOMy_1;
-                    denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T2 = sqrtf(denom1 + denom2 + denom3 + EPS);
-                    D2[index] = NOMy_1/T2;
-                }}}
-    }
-    else {
-#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-		index = j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
-                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
-                NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
-                /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */  /* y- */
-                
-                denom1 = NOMy_1*NOMy_1;
-                denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
-                denom2 = denom2*denom2;
-                T2 = sqrtf(denom1 + denom2 + EPS);
-                D2[index] = NOMy_1/T2;
-            }}
-    }
-    return *D2;
-}
-
-/* calculate differences 3 */
-float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ)
-{
-    float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
-    long index,i,j,k,i1,i2,k1,j1,j2,k2;
-    
-#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
-    for(j=0; j<dimY; j++) {
-        for(i=0; i<dimX; i++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                k2 = k - 1; if (k2 < 0) k2 = k+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
-                NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
-                NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
-                NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
-                /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
-                
-                denom1 = NOMz_1*NOMz_1;
-                denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
-                denom2 = denom2*denom2;
-                denom3 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
-                denom3 = denom3*denom3;
-                T3 = sqrtf(denom1 + denom2 + denom3 + EPS);
-                D3[index] = NOMz_1/T3;
-            }}}
-    return *D3;
-}
-
-/* calculate divergence */
-float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, long dimX, long dimY, long dimZ)
-{
-    float dv1, dv2, dv3;
-    long index,i,j,k,i1,i2,k1,j1,j2,k2;
-    
-    if (dimZ > 1) {
-#pragma omp parallel for shared (D1, D2, D3, B, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, dv1,dv2,dv3)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                for(k=0; k<dimZ; k++) {
-                    index = (dimX*dimY)*k + j*dimX+i;
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    /*divergence components */
-                    dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
-                    dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
-                    dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
-                    
-                    B[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index]));   
-                }}}
-    }
-    else {
-#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2)
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-                index = j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                
-                /* divergence components  */
-                dv1 = D1[index] - D1[j2*dimX + i];
-                dv2 = D2[index] - D2[j*dimX + i2];
-
-                B[index] += tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index]));
-            }}
-    }
-    return *B;
-}
diff --git a/Core/regularisers_CPU/ROF_TV_core.h b/Core/regularisers_CPU/ROF_TV_core.h
deleted file mode 100644
index 4e320e9..0000000
--- a/Core/regularisers_CPU/ROF_TV_core.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
- *
- * 
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
- * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
- *
- * D. Kazantsev, 2016-18
- */
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-
-CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
\ No newline at end of file
diff --git a/Core/regularisers_CPU/SB_TV_core.c b/Core/regularisers_CPU/SB_TV_core.c
deleted file mode 100755
index 769ea67..0000000
--- a/Core/regularisers_CPU/SB_TV_core.c
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "SB_TV_core.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularisation parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
-*
-* Output:
-* 1. Filtered/regularized image
-*
-* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
-*/
- 
-float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
-{
-	int ll;
-    long j, DimTotal;    
-	float re, re1, lambda;
-    int count = 0;
-    mu = 1.0f/mu;
-    lambda = 2.0f*mu;
-
-	if (dimZ <= 1) {
-		/* 2D case */
-		float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
-		DimTotal = (long)(dimX*dimY);
-		
-		Output_prev = calloc(DimTotal, sizeof(float));
-		Dx = calloc(DimTotal, sizeof(float));
-		Dy = calloc(DimTotal, sizeof(float));
-		Bx = calloc(DimTotal, sizeof(float));
-		By = calloc(DimTotal, sizeof(float));
-        
-        copyIm(Input, Output, (long)(dimX), (long)(dimY), 1l); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* storing old estimate */
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
-            
-            /* perform two GS iterations (normally 2 is enough for the convergence) */
-            gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
-            /*GS iteration */
-            gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
-            
-            /* TV-related step */
-            if (methodTV == 1)  updDxDy_shrinkAniso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
-            else updDxDy_shrinkIso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
-            
-            /* update for Bregman variables */
-            updBxBy2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY));
-            
-            /* check early stopping criteria if epsilon not equal zero */
-            if (epsil != 0) {
-            re = 0.0f; re1 = 0.0f;
-				for(j=0; j<DimTotal; j++) {
-                re += pow(Output[j] - Output_prev[j],2);
-                re1 += pow(Output[j],2);
-				}
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-				if (count > 4) break;
-			}
-            /*printf("%f %i %i \n", re, ll, count); */
-        }
-        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
-		free(Output_prev); free(Dx); free(Dy); free(Bx); free(By);
-	}
-	else {
-		/* 3D case */
-		float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
-		DimTotal = (long)(dimX*dimY*dimZ);
-		
-		Output_prev = calloc(DimTotal, sizeof(float));
-		Dx = calloc(DimTotal, sizeof(float));
-		Dy = calloc(DimTotal, sizeof(float));
-		Dz = calloc(DimTotal, sizeof(float));
-		Bx = calloc(DimTotal, sizeof(float));
-		By = calloc(DimTotal, sizeof(float));
-		Bz = calloc(DimTotal, sizeof(float));
-        
-        copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /*initialize */
-        
-        /* begin outer SB iterations */
-        for(ll=0; ll<iter; ll++) {
-            
-            /* storing old estimate */
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-             /* perform two GS iterations (normally 2 is enough for the convergence) */
-            gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
-            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
-            /*GS iteration */
-            gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
-            
-            /* TV-related step */
-            if (methodTV == 1)  updDxDyDz_shrinkAniso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
-            else updDxDyDz_shrinkIso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
-            
-            /* update for Bregman variables */
-            updBxByBz3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* check early stopping criteria if epsilon not equal zero */
-            if (epsil != 0) {
-            re = 0.0f; re1 = 0.0f;
-            for(j=0; j<DimTotal; j++) {
-                re += pow(Output[j] - Output_prev[j],2);
-                re1 += pow(Output[j],2);
-				}
-            re = sqrt(re)/sqrt(re1);
-            if (re < epsil)  count++;
-				if (count > 4) break;
-			}
-            /*printf("%f %i %i \n", re, ll, count); */
-        }
-        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
-		free(Output_prev); free(Dx); free(Dy); free(Dz); free(Bx); free(By); free(Bz);
-	}
-	return *Output;
-}
-
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda, float mu)
-{
-    float sum, normConst;
-    long i,j,i1,i2,j1,j2,index;
-    normConst = 1.0f/(mu + 4.0f*lambda);
-    
-#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,sum)
-    for(i=0; i<dimX; i++) {
-        /* symmetric boundary conditions (Neuman) */
-        i1 = i+1; if (i1 == dimX) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            index = j*dimX+i;
-            
-            sum = Dx[j*dimX+i2] - Dx[index] + Dy[j2*dimX+i] - Dy[index] - Bx[j*dimX+i2] + Bx[index] - By[j2*dimX+i] + By[index];
-            sum += U_prev[j*dimX+i1] + U_prev[j*dimX+i2] + U_prev[j1*dimX+i] + U_prev[j2*dimX+i];
-            sum *= lambda;
-            sum += mu*A[index];
-            U[index] = normConst*sum;
-        }}
-    return *U;
-}
-
-float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda)
-{
-    long i,j,i1,j1,index;
-    float val1, val11, val2, val22, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,val22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            index = j*dimX+i;
-            
-            val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
-            val2 = (U[j1*dimX+i] - U[index]) + By[index];
-            
-            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-            
-            if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
-            if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
-            
-        }}
-    return 1;
-}
-float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda)
-{
-    long i,j,i1,j1,index;
-    float val1, val11, val2, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-    
-#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,denom)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            index = j*dimX+i;
-            
-            val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
-            val2 = (U[j1*dimX+i] - U[index]) + By[index];
-            
-            denom = sqrt(val1*val1 + val2*val2);
-            
-            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-            
-            if (denom != 0.0f) {
-                Dx[index] = val11*(val1/denom);
-                Dy[index] = val11*(val2/denom);
-            }
-            else {
-                Dx[index] = 0;
-                Dy[index] = 0;
-            }
-        }}
-    return 1;
-}
-float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY)
-{
-    long i,j,i1,j1,index;
-#pragma omp parallel for shared(U) private(index,i,j,i1,j1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == dimX) i1 = i-1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            index = j*dimX+i;
-            
-            Bx[index] += (U[j*dimX+i1] - U[index]) - Dx[index];
-            By[index] += (U[j1*dimX+i] - U[index]) - Dy[index];
-        }}
-    return 1;
-}
-
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-/*****************************************************************/
-float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda, float mu)
-{
-    float normConst, d_val, b_val, sum;
-    long i,j,i1,i2,j1,j2,k,k1,k2,index;
-    normConst = 1.0f/(mu + 6.0f*lambda);
-#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                i2 = i-1; if (i2 < 0) i2 = i+1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                j2 = j-1; if (j2 < 0) j2 = j+1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                k2 = k-1; if (k2 < 0) k2 = k+1;
-                index = (dimX*dimY)*k + j*dimX+i;
-                
-                d_val = Dx[(dimX*dimY)*k + j*dimX+i2] - Dx[index] + Dy[(dimX*dimY)*k + j2*dimX+i] - Dy[index] + Dz[(dimX*dimY)*k2 + j*dimX+i] - Dz[index];
-                b_val = -Bx[(dimX*dimY)*k + j*dimX+i2] + Bx[index] - By[(dimX*dimY)*k + j2*dimX+i] + By[index] - Bz[(dimX*dimY)*k2 + j*dimX+i] + Bz[index];
-                sum = d_val + b_val;
-                sum += U_prev[(dimX*dimY)*k + j*dimX+i1] + U_prev[(dimX*dimY)*k + j*dimX+i2] + U_prev[(dimX*dimY)*k + j1*dimX+i] + U_prev[(dimX*dimY)*k + j2*dimX+i] + U_prev[(dimX*dimY)*k1 + j*dimX+i] + U_prev[(dimX*dimY)*k2 + j*dimX+i];
-                sum *= lambda;
-                sum += mu*A[index];
-                U[index] = normConst*sum;
-            }}}
-    return *U;
-}
-
-float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda)
-{
-    long i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val22, val3, val33, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
-                
-                val11 = fabs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
-                val22 = fabs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
-                val33 = fabs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
-                
-                if (val1 !=0.0f) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0.0f;
-                if (val2 !=0.0f) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0.0f;
-                if (val3 !=0.0f) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0.0f;
-                
-            }}}
-    return 1;
-}
-float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda)
-{
-    long i,j,i1,j1,k,k1,index;
-    float val1, val11, val2, val3, denom, denom_lam;
-    denom_lam = 1.0f/lambda;
-#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-                index = (dimX*dimY)*k + j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
-                val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
-                val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
-                
-                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
-                
-                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-                
-                if (denom != 0.0f) {
-                    Dx[index] = val11*(val1/denom);
-                    Dy[index] = val11*(val2/denom);
-                    Dz[index] = val11*(val3/denom);
-                }
-                else {
-                    Dx[index] = 0;
-                    Dy[index] = 0;
-                    Dz[index] = 0;
-                }
-            }}}
-    return 1;
-}
-float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ)
-{
-    long i,j,k,i1,j1,k1,index;
-#pragma omp parallel for shared(U) private(index,i,j,k,i1,j1,k1)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            for(k=0; k<dimZ; k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
-                /* symmetric boundary conditions (Neuman) */
-                i1 = i+1; if (i1 == dimX) i1 = i-1;
-                j1 = j+1; if (j1 == dimY) j1 = j-1;
-                k1 = k+1; if (k1 == dimZ) k1 = k-1;
-                
-                Bx[index] += (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) - Dx[index];
-                By[index] += (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) - Dy[index];
-                Bz[index] += (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) - Dz[index];
-            }}}
-    return 1;
-}
diff --git a/Core/regularisers_CPU/SB_TV_core.h b/Core/regularisers_CPU/SB_TV_core.h
deleted file mode 100644
index 7485e3b..0000000
--- a/Core/regularisers_CPU/SB_TV_core.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularisation parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
-*
-* Output:
-* 1. Filtered/regularized image
-*
-* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
-
-CCPI_EXPORT float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda, float mu);
-CCPI_EXPORT float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda);
-CCPI_EXPORT float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda);
-CCPI_EXPORT float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY);
-
-CCPI_EXPORT float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda, float mu);
-CCPI_EXPORT float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda);
-CCPI_EXPORT float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda);
-CCPI_EXPORT float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/TGV_core.c b/Core/regularisers_CPU/TGV_core.c
deleted file mode 100644
index 805c3d4..0000000
--- a/Core/regularisers_CPU/TGV_core.c
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "TGV_core.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D/3D)
- * 2. lambda - regularisation parameter
- * 3. parameter to control the first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of Chambolle-Pock (Primal-Dual) iterations
- * 6. Lipshitz constant (default is 12)
- * 
- * Output:
- * Filtered/regularised image/volume
- *
- * References:
- * [1] K. Bredies "Total Generalized Variation"
- * 
- */
- 
-float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ)
-{
-	long DimTotal;
-	int ll;
-	float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
-
-	DimTotal = (long)(dimX*dimY*dimZ);
-	copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
-        tau = pow(L2,-0.5);
-        sigma = pow(L2,-0.5);
-
-        /* dual variables */
-        P1 = calloc(DimTotal, sizeof(float));
-        P2 = calloc(DimTotal, sizeof(float));
-        
-        Q1 = calloc(DimTotal, sizeof(float));
-        Q2 = calloc(DimTotal, sizeof(float));
-        Q3 = calloc(DimTotal, sizeof(float));
-        
-        U_old = calloc(DimTotal, sizeof(float));
-        
-        V1 = calloc(DimTotal, sizeof(float));
-        V1_old = calloc(DimTotal, sizeof(float));
-        V2 = calloc(DimTotal, sizeof(float));
-        V2_old = calloc(DimTotal, sizeof(float));
-	
-	if (dimZ == 1) {
-	/*2D case*/
-	
-        /* Primal-dual iterations begin here */
-        for(ll = 0; ll < iter; ll++) {
-            
-            /* Calculate Dual Variable P */
-            DualP_2D(U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), sigma);
-            
-            /*Projection onto convex set for P*/
-            ProjP_2D(P1, P2, (long)(dimX), (long)(dimY), alpha1);
-            
-            /* Calculate Dual Variable Q */
-            DualQ_2D(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), sigma);
-            
-            /*Projection onto convex set for Q*/
-            ProjQ_2D(Q1, Q2, Q3, (long)(dimX), (long)(dimY), alpha0);
-            
-            /*saving U into U_old*/
-            copyIm(U, U_old, (long)(dimX), (long)(dimY), 1l);
-            
-            /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_2D(U, U0, P1, P2, (long)(dimX), (long)(dimY), lambda, tau);
-            
-            /*get updated solution U*/
-            newU(U, U_old, (long)(dimX), (long)(dimY));
-            
-            /*saving V into V_old*/
-            copyIm(V1, V1_old, (long)(dimX), (long)(dimY), 1l);
-            copyIm(V2, V2_old, (long)(dimX), (long)(dimY), 1l);
-            
-            /* upd V*/
-            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), tau);
-            
-            /*get new V*/
-            newU(V1, V1_old, (long)(dimX), (long)(dimY));
-            newU(V2, V2_old, (long)(dimX), (long)(dimY));
-        } /*end of iterations*/
-        	}
-        else {
-        /*3D case*/
-        float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
-        
-        P3 = calloc(DimTotal, sizeof(float));
-        Q4 = calloc(DimTotal, sizeof(float));
-        Q5 = calloc(DimTotal, sizeof(float));
-        Q6 = calloc(DimTotal, sizeof(float));
-        V3 = calloc(DimTotal, sizeof(float));
-        V3_old = calloc(DimTotal, sizeof(float));
-        
-         /* Primal-dual iterations begin here */
-        for(ll = 0; ll < iter; ll++) {
-            
-            /* Calculate Dual Variable P */
-            DualP_3D(U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
-            
-            /*Projection onto convex set for P*/
-            ProjP_3D(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1);
-            
-            /* Calculate Dual Variable Q */
-            DualQ_3D(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
-            
-            /*Projection onto convex set for Q*/
-            ProjQ_3D(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0);
-            
-            /*saving U into U_old*/
-            copyIm(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_3D(U, U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau);
-            
-            /*get updated solution U*/
-            newU3D(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /*saving V into V_old*/
-            copyIm_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
-            
-            /* upd V*/
-            UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
-            
-            /*get new V*/
-            newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));           
-	        } /*end of iterations*/
-        free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old);
-        }     
-
-    /*freeing*/
-    free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old);
-    free(V1);free(V2);free(V1_old);free(V2_old);
-	return *U;
-}
-
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-
-/*Calculating dual variable P (using forward differences)*/
-float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma)
-{
-    long i,j, index;
-#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-			 index = j*dimX+i;
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); 
-            else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]); 
-            if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index])  - V2[index]);
-            else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
-        }}
-    return 1;
-}
-/*Projection onto convex set for P*/
-float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1)
-{
-    float grad_magn;
-    long i,j,index;
-#pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-	    index = j*dimX+i;
-            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1;
-            if (grad_magn > 1.0f) {
-                P1[index] /= grad_magn;
-                P2[index] /= grad_magn;
-            }
-        }}
-    return 1;
-}
-/*Calculating dual variable Q (using forward differences)*/
-float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma)
-{
-    long i,j,index;
-    float q1, q2, q11, q22;
-#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-    	    index = j*dimX+i;
-    	    q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
-            /* boundary conditions (Neuman) */
-            if (i != dimX-1){
-                q1 = V1[j*dimX+(i+1)] - V1[index];
-                q11 = V2[j*dimX+(i+1)] - V2[index];
-            }
-            if (j != dimY-1) {
-                q2 = V2[(j+1)*dimX+i] - V2[index];
-                q22 = V1[(j+1)*dimX+i] - V1[index];
-            }
-            Q1[index] += sigma*(q1);
-            Q2[index] += sigma*(q2);
-            Q3[index] += sigma*(0.5f*(q11 + q22));
-        }}
-    return 1;
-}
-float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0)
-{
-    float grad_magn;
-    long i,j,index;
-#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-	   index = j*dimX+i;
-            grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
-            grad_magn = grad_magn/alpha0;
-            if (grad_magn > 1.0f) {
-                Q1[index] /= grad_magn;
-                Q2[index] /= grad_magn;
-                Q3[index] /= grad_magn;
-            }
-        }}
-    return 1;
-}
-/* Divergence and projection for P*/
-float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau)
-{
-    long i,j,index;
-    float P_v1, P_v2, div;
-#pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-	    index = j*dimX+i;
-            if (i == 0) P_v1 = P1[index];
-            else P_v1 = P1[index] - P1[j*dimX+(i-1)];
-            if (j == 0) P_v2 = P2[index];
-            else  P_v2 = P2[index] - P2[(j-1)*dimX+i];
-            div = P_v1 + P_v2;
-            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
-        }}
-    return *U;
-}
-/*get updated solution U*/
-float newU(float *U, float *U_old, long dimX, long dimY)
-{
-    long i;
-#pragma omp parallel for shared(U,U_old) private(i)
-    for(i=0; i<dimX*dimY; i++) U[i] = 2*U[i] - U_old[i];
-    return *U;
-}
-/*get update for V*/
-float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau)
-{
-    long i, j, index;
-    float q1, q3_x, q3_y, q2, div1, div2;
-#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-	    index = j*dimX+i;
-              q2 = 0.0f;  q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
-            /* boundary conditions (Neuman) */
-            if (i != 0) {
-                q1 = Q1[index] - Q1[j*dimX+(i-1)];
-                q3_x = Q3[index] - Q3[j*dimX+(i-1)];
-            }
-            if (j != 0) {
-                q2 = Q2[index] - Q2[(j-1)*dimX+i];
-                q3_y = Q3[index] - Q3[(j-1)*dimX+i];
-            }
-            div1 = q1 + q3_y;
-            div2 = q3_x + q2;
-            V1[index] += tau*(P1[index] + div1);
-            V2[index] += tau*(P2[index] + div2);
-        }}
-    return 1;
-}
-
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-/*Calculating dual variable P (using forward differences)*/
-float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma)
-{
-    long i,j,k, index;
-#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-          for(k=0; k<dimZ; k++) {             	   
-    	   index = (dimX*dimY)*k + j*dimX+i;    	   
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); 
-            else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index])  - V1[index]); 
-            if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index])  - V2[index]);
-            else  P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index])  - V2[index]);
-            if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index])  - V3[index]);
-            else  P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index])  - V3[index]);
-        }}}
-    return 1;
-}
-/*Projection onto convex set for P*/
-float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1)
-{
-    float grad_magn;
-    long i,j,k,index;
-#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-	  for(k=0; k<dimZ; k++) {   	
-   	    index = (dimX*dimY)*k + j*dimX+i;
-            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
-            if (grad_magn > 1.0f) {
-                P1[index] /= grad_magn;
-                P2[index] /= grad_magn;
-                P3[index] /= grad_magn;
-            }
-        }}}
-    return 1;
-}
-/*Calculating dual variable Q (using forward differences)*/
-float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma)
-{
-    long i,j,k,index;
-    float q1, q2, q3, q11, q22, q33, q44, q55, q66;
-#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-       	  for(k=0; k<dimZ; k++) {   	
-	    index = (dimX*dimY)*k + j*dimX+i;
-	    q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
-            /* symmetric boundary conditions (Neuman) */
-            if (i != dimX-1){ 
-                q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];              
-                q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
-                q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
-            }
-            if (j != dimY-1) {
-                q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];                
-                q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
-                q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
-            }
-            if (k != dimZ-1) {
-                q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
-                q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
-                q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
-            }
-            
-            Q1[index] += sigma*(q1); /*Q11*/
-            Q2[index] += sigma*(q2); /*Q22*/            
-            Q3[index] += sigma*(q3); /*Q33*/
-            Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
-            Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
-            Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
-        }}}
-    return 1;
-}
-float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0)
-{
-    float grad_magn;
-    long i,j,k,index;
-#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-       	  for(k=0; k<dimZ; k++) {   	
-	    index = (dimX*dimY)*k + j*dimX+i;           
-            grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
-            grad_magn = grad_magn/alpha0;
-            if (grad_magn > 1.0f) {
-                Q1[index] /= grad_magn;
-                Q2[index] /= grad_magn;
-                Q3[index] /= grad_magn;
-                Q4[index] /= grad_magn;
-                Q5[index] /= grad_magn;
-                Q6[index] /= grad_magn;
-            }
-        }}}
-    return 1;
-}
-/* Divergence and projection for P*/
-float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau)
-{
-    long i,j,k,index;
-    float P_v1, P_v2, P_v3, div;
-#pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-       	  for(k=0; k<dimZ; k++) {   	
-	    index = (dimX*dimY)*k + j*dimX+i; 	    
-            if (i == 0) P_v1 = P1[index];
-            else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
-            if (j == 0) P_v2 = P2[index];
-            else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
-            if (k == 0) P_v3 = P3[index];
-            else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];              
-                      
-            div = P_v1 + P_v2 + P_v3;
-            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); 
-        }}}
-    return *U;
-}
-/*get update for V*/
-float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau)
-{
-    long i,j,k,index;
-    float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
-#pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-       	  for(k=0; k<dimZ; k++) {   	
-	    index = (dimX*dimY)*k + j*dimX+i; 	
-	    q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
-            /* Q1 - Q11, Q2 - Q22, Q3 -  Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/            
-            /* symmetric boundary conditions (Neuman) */
-            if (i != 0) {
-                q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
-                q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];                
-                q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
-            }
-            if (j != 0) {
-                q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
-                q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
-                q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
-            }
-             if (k != 0) {
-                q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
-                q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
-                q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
-            }
-            div1 = q1 + q4y + q5z;
-            div2 = q4x + q2 + q6z;            
-            div3 = q5x + q6y + q3;
-            
-            V1[index] += tau*(P1[index] + div1);
-            V2[index] += tau*(P2[index] + div2);
-            V3[index] += tau*(P3[index] + div3);
-        }}}
-    return 1;
-}
-
-float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
-{
-	long j;
-#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  {	
-	V1_old[j] = V1[j];
-	V2_old[j] = V2[j];
-	V3_old[j] = V3[j];	
-	}
-	return 1;
-}
-
-/*get updated solution U*/
-float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ)
-{
-    long i;
-#pragma omp parallel for shared(U, U_old) private(i)
-    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2.0f*U[i] - U_old[i];
-    return *U;
-}
-
-
-/*get updated solution U*/
-float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
-{
-    long i;
-#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i)
-    for(i=0; i<dimX*dimY*dimZ; i++) {
-    V1[i] = 2.0f*V1[i] - V1_old[i];
-    V2[i] = 2.0f*V2[i] - V2_old[i];
-    V3[i] = 2.0f*V3[i] - V3_old[i];
-    }
-    return 1;
-}
-
diff --git a/Core/regularisers_CPU/TGV_core.h b/Core/regularisers_CPU/TGV_core.h
deleted file mode 100644
index 11b12c1..0000000
--- a/Core/regularisers_CPU/TGV_core.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D/3D)
- * 2. lambda - regularisation parameter
- * 3. parameter to control the first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of Chambolle-Pock (Primal-Dual) iterations
- * 6. Lipshitz constant (default is 12)
- * 
- * Output:
- * Filtered/regularised image/volume
- *
- * References:
- * [1] K. Bredies "Total Generalized Variation"
- */
- 
- 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-CCPI_EXPORT float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ);
-
-/* 2D functions */
-CCPI_EXPORT float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma);
-CCPI_EXPORT float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1);
-CCPI_EXPORT float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma);
-CCPI_EXPORT float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0);
-CCPI_EXPORT float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau);
-CCPI_EXPORT float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau);
-CCPI_EXPORT float newU(float *U, float *U_old, long dimX, long dimY);
-/* 3D functions */
-CCPI_EXPORT float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma);
-CCPI_EXPORT float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1);
-CCPI_EXPORT float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma);
-CCPI_EXPORT float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0);
-CCPI_EXPORT float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau);
-CCPI_EXPORT float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau);
-CCPI_EXPORT float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_CPU/TNV_core.c b/Core/regularisers_CPU/TNV_core.c
deleted file mode 100755
index 753cc5f..0000000
--- a/Core/regularisers_CPU/TNV_core.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TNV_core.h"
-
-/*
- * C-OMP implementation of Total Nuclear Variation regularisation model (2D + channels) [1]
- * The code is modified from the implementation by Joan Duran <joan.duran@uib.es> see
- * "denoisingPDHG_ipol.cpp" in Joans Collaborative Total Variation package
- *
- * Input Parameters:
- * 1. Noisy volume of 2D + channel dimension, i.e. 3D volume
- * 2. lambda - regularisation parameter
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon - tolerance constant [OPTIONAL parameter]
- * 5. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
- *
- * Output:
- * 1. Filtered/regularized image
- *
- * [1]. Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.
- */
-
-float TNV_CPU_main(float *Input, float *u, float lambda, int maxIter, float tol, int dimX, int dimY, int dimZ)
-{
-    long k, p, q, r, DimTotal;
-    float taulambda;
-    float *u_upd, *gx, *gy, *gx_upd, *gy_upd, *qx, *qy, *qx_upd, *qy_upd, *v, *vx, *vy, *gradx, *grady, *gradx_upd, *grady_upd, *gradx_ubar, *grady_ubar, *div, *div_upd;
-    
-    p = 1l;
-    q = 1l;
-    r = 0l;
-    
-    lambda = 1.0f/(2.0f*lambda);
-    DimTotal = (long)(dimX*dimY*dimZ);
-    /* PDHG algorithm parameters*/
-    float tau = 0.5f;
-    float sigma = 0.5f;
-    float theta = 1.0f;
-    
-    // Auxiliar vectors
-    u_upd = calloc(DimTotal, sizeof(float));
-    gx = calloc(DimTotal, sizeof(float));
-    gy = calloc(DimTotal, sizeof(float));
-    gx_upd = calloc(DimTotal, sizeof(float));
-    gy_upd = calloc(DimTotal, sizeof(float));
-    qx = calloc(DimTotal, sizeof(float));
-    qy = calloc(DimTotal, sizeof(float));
-    qx_upd = calloc(DimTotal, sizeof(float));
-    qy_upd = calloc(DimTotal, sizeof(float));
-    v = calloc(DimTotal, sizeof(float));
-    vx = calloc(DimTotal, sizeof(float));
-    vy = calloc(DimTotal, sizeof(float));
-    gradx = calloc(DimTotal, sizeof(float));
-    grady = calloc(DimTotal, sizeof(float));
-    gradx_upd = calloc(DimTotal, sizeof(float));
-    grady_upd = calloc(DimTotal, sizeof(float));
-    gradx_ubar = calloc(DimTotal, sizeof(float));
-    grady_ubar = calloc(DimTotal, sizeof(float));
-    div = calloc(DimTotal, sizeof(float));
-    div_upd = calloc(DimTotal, sizeof(float));
-    
-    // Backtracking parameters
-    float s = 1.0f;
-    float gamma = 0.75f;
-    float beta = 0.95f;
-    float alpha0 = 0.2f;
-    float alpha = alpha0;
-    float delta = 1.5f;
-    float eta = 0.95f;
-    
-    // PDHG algorithm parameters
-    taulambda = tau * lambda;
-    float divtau = 1.0f / tau;
-    float divsigma = 1.0f / sigma;
-    float theta1 = 1.0f + theta;
-    
-    /*allocate memory for  taulambda */
-    //taulambda = (float*) calloc(dimZ, sizeof(float));
-    //for(k=0; k < dimZ; k++)  {taulambda[k] = tau*lambda[k];}
-    
-    // Apply Primal-Dual Hybrid Gradient scheme
-    int iter = 0;
-    float residual = fLarge;
-    float ubarx, ubary;
-    
-    for(iter = 0; iter < maxIter; iter++)   {
-        // Argument of proximal mapping of fidelity term
-#pragma omp parallel for shared(v, u) private(k)
-        for(k=0; k<dimX*dimY*dimZ; k++)  {v[k] = u[k] + tau*div[k];}
-
-// Proximal solution of fidelity term
-proxG(u_upd, v, Input, taulambda, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-// Gradient of updated primal variable
-gradient(u_upd, gradx_upd, grady_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-// Argument of proximal mapping of regularization term
-#pragma omp parallel for shared(gradx_upd, grady_upd, gradx, grady) private(k, ubarx, ubary)
-for(k=0; k<dimX*dimY*dimZ; k++) {
-    ubarx = theta1 * gradx_upd[k] - theta * gradx[k];
-    ubary = theta1 * grady_upd[k] - theta * grady[k];
-    vx[k] = ubarx + divsigma * qx[k];
-    vy[k] = ubary + divsigma * qy[k];
-    gradx_ubar[k] = ubarx;
-    grady_ubar[k] = ubary;
-}
-
-proxF(gx_upd, gy_upd, vx, vy, sigma, p, q, r, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-// Update dual variable
-#pragma omp parallel for shared(qx_upd, qy_upd) private(k)
-for(k=0; k<dimX*dimY*dimZ; k++) {
-    qx_upd[k] = qx[k] + sigma * (gradx_ubar[k] - gx_upd[k]);
-    qy_upd[k] = qy[k] + sigma * (grady_ubar[k] - gy_upd[k]);
-}
-
-// Divergence of updated dual variable
-#pragma omp parallel for shared(div_upd) private(k)
-for(k=0; k<dimX*dimY*dimZ; k++)  {div_upd[k] = 0.0f;}
-divergence(qx_upd, qy_upd, div_upd, dimX, dimY, dimZ);
-
-// Compute primal residual, dual residual, and backtracking condition
-float resprimal = 0.0f;
-float resdual = 0.0f;
-float product = 0.0f;
-float unorm = 0.0f;
-float qnorm = 0.0f;
-
-for(k=0; k<dimX*dimY*dimZ; k++) {
-    float udiff = u[k] - u_upd[k];
-    float qxdiff = qx[k] - qx_upd[k];
-    float qydiff = qy[k] - qy_upd[k];
-    float divdiff = div[k] - div_upd[k];
-    float gradxdiff = gradx[k] - gradx_upd[k];
-    float gradydiff = grady[k] - grady_upd[k];
-    
-    resprimal += fabs(divtau*udiff + divdiff);
-    resdual += fabs(divsigma*qxdiff - gradxdiff);
-    resdual += fabs(divsigma*qydiff - gradydiff);
-    
-    unorm += (udiff * udiff);
-    qnorm += (qxdiff * qxdiff + qydiff * qydiff);
-    product += (gradxdiff * qxdiff + gradydiff * qydiff);
-}
-
-float b = (2.0f * tau * sigma * product) / (gamma * sigma * unorm +
-        gamma * tau * qnorm);
-
-// Adapt step-size parameters
-float dual_dot_delta = resdual * s * delta;
-float dual_div_delta = (resdual * s) / delta;
-
-if(b > 1)
-{
-    // Decrease step-sizes to fit balancing principle
-    tau = (beta * tau) / b;
-    sigma = (beta * sigma) / b;
-    alpha = alpha0;
-    
-    copyIm(u, u_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(gx, gx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(gy, gy_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(qx, qx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(qy, qy_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(gradx, gradx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(grady, grady_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    copyIm(div, div_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
-    
-} else if(resprimal > dual_dot_delta)
-{
-    // Increase primal step-size and decrease dual step-size
-    tau = tau / (1.0f - alpha);
-    sigma = sigma * (1.0f - alpha);
-    alpha = alpha * eta;
-    
-} else if(resprimal < dual_div_delta)
-{
-    // Decrease primal step-size and increase dual step-size
-    tau = tau * (1.0f - alpha);
-    sigma = sigma / (1.0f - alpha);
-    alpha = alpha * eta;
-}
-
-// Update variables
-taulambda = tau * lambda;
-//for(k=0; k < dimZ; k++) taulambda[k] = tau*lambda[k];
-
-divsigma = 1.0f / sigma;
-divtau = 1.0f / tau;
-
-copyIm(u_upd, u, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(gx_upd, gx, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(gy_upd, gy, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(qx_upd, qx, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(qy_upd, qy, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(gradx_upd, gradx, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(grady_upd, grady, (long)(dimX), (long)(dimY), (long)(dimZ));
-copyIm(div_upd, div, (long)(dimX), (long)(dimY), (long)(dimZ));
-
-// Compute residual at current iteration
-residual = (resprimal + resdual) / ((float) (dimX*dimY*dimZ));
-
-//       printf("%f \n", residual);
-if (residual < tol) {
-    printf("Iterations stopped at %i with the residual %f \n", iter, residual);
-    break; }
-
-    }
-    printf("Iterations stopped at %i with the residual %f \n", iter, residual);
-    free (u_upd); free(gx); free(gy); free(gx_upd); free(gy_upd);
-    free(qx); free(qy); free(qx_upd); free(qy_upd); free(v); free(vx); free(vy);
-    free(gradx); free(grady); free(gradx_upd); free(grady_upd); free(gradx_ubar);
-    free(grady_ubar); free(div); free(div_upd);    
-    return *u;
-}
-
-float proxG(float *u_upd, float *v, float *f, float taulambda, long dimX, long dimY, long dimZ)
-{
-    float constant;
-    long k;
-    constant = 1.0f + taulambda;
-#pragma omp parallel for shared(v, f, u_upd) private(k)
-    for(k=0; k<dimZ*dimX*dimY; k++) {
-        u_upd[k] = (v[k] + taulambda * f[k])/constant;
-        //u_upd[(dimX*dimY)*k + l] = (v[(dimX*dimY)*k + l] + taulambda * f[(dimX*dimY)*k + l])/constant;
-    }
-    return *u_upd;
-}
-
-float gradient(float *u_upd, float *gradx_upd, float *grady_upd, long dimX, long dimY, long dimZ)
-{
-    long i, j, k, l;
-    // Compute discrete gradient using forward differences
-#pragma omp parallel for shared(gradx_upd,grady_upd,u_upd) private(i, j, k, l)
-    for(k = 0; k < dimZ; k++)   {
-        for(j = 0; j < dimY; j++)   {
-            l = j * dimX;           
-            for(i = 0; i < dimX; i++)   {
-                // Derivatives in the x-direction
-                if(i != dimX-1)
-                    gradx_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+1+l] - u_upd[(dimX*dimY)*k + i+l];
-                else
-                    gradx_upd[(dimX*dimY)*k + i+l] = 0.0f;
-                
-                // Derivatives in the y-direction
-                if(j != dimY-1)
-                    //grady_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+dimY+l] -u_upd[(dimX*dimY)*k + i+l];
-                    grady_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+(j+1)*dimX] -u_upd[(dimX*dimY)*k + i+l];
-                else
-                    grady_upd[(dimX*dimY)*k + i+l] = 0.0f;
-            }}}
-    return 1;
-}
-
-float proxF(float *gx, float *gy, float *vx, float *vy, float sigma, int p, int q, int r, long dimX, long dimY, long dimZ)
-{
-    // (S^p, \ell^1) norm decouples at each pixel
-//   Spl1(gx, gy, vx, vy, sigma, p, num_channels, dim);
-    float divsigma = 1.0f / sigma;
-    
-    // $\ell^{1,1,1}$-TV regularization
-//       int i,j,k;
-//     #pragma omp parallel for shared (gx,gy,vx,vy) private(i,j,k)
-//      for(k = 0; k < dimZ; k++)  {
-//         for(i=0; i<dimX; i++) {
-//              for(j=0; j<dimY; j++) {
-//                 gx[(dimX*dimY)*k + (i)*dimY + (j)] = SIGN(vx[(dimX*dimY)*k + (i)*dimY + (j)]) * MAX(fabs(vx[(dimX*dimY)*k + (i)*dimY + (j)]) - divsigma,  0.0f);
-//                 gy[(dimX*dimY)*k + (i)*dimY + (j)] = SIGN(vy[(dimX*dimY)*k + (i)*dimY + (j)]) * MAX(fabs(vy[(dimX*dimY)*k + (i)*dimY + (j)]) - divsigma,  0.0f);
-//             }}}
-    
-    // Auxiliar vector
-    float *proj, sum, shrinkfactor ;
-    float M1,M2,M3,valuex,valuey,T,D,det,eig1,eig2,sig1,sig2,V1, V2, V3, V4, v0,v1,v2, mu1,mu2,sig1_upd,sig2_upd,t1,t2,t3;
-    long i,j,k, ii, num;
-#pragma omp parallel for shared (gx,gy,vx,vy,p) private(i,ii,j,k,proj,num, sum, shrinkfactor, M1,M2,M3,valuex,valuey,T,D,det,eig1,eig2,sig1,sig2,V1, V2, V3, V4,v0,v1,v2,mu1,mu2,sig1_upd,sig2_upd,t1,t2,t3)
-    for(i=0; i<dimX; i++) {
-        for(j=0; j<dimY; j++) {
-            
-            proj = (float*) calloc (2,sizeof(float));
-            // Compute matrix $M\in\R^{2\times 2}$
-            M1 = 0.0f;
-            M2 = 0.0f;
-            M3 = 0.0f;
-            
-            for(k = 0; k < dimZ; k++)
-            {
-                valuex = vx[(dimX*dimY)*k + (j)*dimX + (i)];
-                valuey = vy[(dimX*dimY)*k + (j)*dimX + (i)];
-                
-                M1 += (valuex * valuex);
-                M2 += (valuex * valuey);
-                M3 += (valuey * valuey);
-            }
-            
-            // Compute eigenvalues of M
-            T = M1 + M3;
-            D = M1 * M3 - M2 * M2;
-            det = sqrt(MAX((T * T / 4.0f) - D, 0.0f));
-            eig1 = MAX((T / 2.0f) + det, 0.0f);
-            eig2 = MAX((T / 2.0f) - det, 0.0f);
-            sig1 = sqrt(eig1);
-            sig2 = sqrt(eig2);
-            
-            // Compute normalized eigenvectors
-            V1 = V2 = V3 = V4 = 0.0f;
-            
-            if(M2 != 0.0f)
-            {
-                v0 = M2;
-                v1 = eig1 - M3;
-                v2 = eig2 - M3;
-                
-                mu1 = sqrtf(v0 * v0 + v1 * v1);
-                mu2 = sqrtf(v0 * v0 + v2 * v2);
-                
-                if(mu1 > fTiny)
-                {
-                    V1 = v1 / mu1;
-                    V3 = v0 / mu1;
-                }
-                
-                if(mu2 > fTiny)
-                {
-                    V2 = v2 / mu2;
-                    V4 = v0 / mu2;
-                }
-                
-            } else
-            {
-                if(M1 > M3)
-                {
-                    V1 = V4 = 1.0f;
-                    V2 = V3 = 0.0f;
-                    
-                } else
-                {
-                    V1 = V4 = 0.0f;
-                    V2 = V3 = 1.0f;
-                }
-            }
-            
-            // Compute prox_p of the diagonal entries
-            sig1_upd = sig2_upd = 0.0f;
-            
-            if(p == 1)
-            {
-                sig1_upd = MAX(sig1 - divsigma, 0.0f);
-                sig2_upd = MAX(sig2 - divsigma, 0.0f);
-                
-            } else if(p == INFNORM)
-            {
-                proj[0] = sigma * fabs(sig1);
-                proj[1] = sigma * fabs(sig2);
-                
-                /*l1 projection part */
-                sum = fLarge;
-                num = 0l;
-                shrinkfactor = 0.0f;
-                while(sum > 1.0f)
-                {
-                    sum = 0.0f;
-                    num = 0;
-                    
-                    for(ii = 0; ii < 2; ii++)
-                    {
-                        proj[ii] = MAX(proj[ii] - shrinkfactor, 0.0f);
-                        
-                        sum += fabs(proj[ii]);
-                        if(proj[ii]!= 0.0f)
-                            num++;
-                    }
-                    
-                    if(num > 0)
-                        shrinkfactor = (sum - 1.0f) / num;
-                    else
-                        break;
-                }
-                /*l1 proj ends*/
-                
-                sig1_upd = sig1 - divsigma * proj[0];
-                sig2_upd = sig2 - divsigma * proj[1];
-            }
-            
-            // Compute the diagonal entries of $\widehat{\Sigma}\Sigma^{\dagger}_0$
-            if(sig1 > fTiny)
-                sig1_upd /= sig1;
-            
-            if(sig2 > fTiny)
-                sig2_upd /= sig2;
-            
-            // Compute solution
-            t1 = sig1_upd * V1 * V1 + sig2_upd * V2 * V2;
-            t2 = sig1_upd * V1 * V3 + sig2_upd * V2 * V4;
-            t3 = sig1_upd * V3 * V3 + sig2_upd * V4 * V4;
-            
-            for(k = 0; k < dimZ; k++)
-            {
-                gx[(dimX*dimY)*k + j*dimX + i] = vx[(dimX*dimY)*k + j*dimX + i] * t1 + vy[(dimX*dimY)*k + j*dimX + i] * t2;
-                gy[(dimX*dimY)*k + j*dimX + i] = vx[(dimX*dimY)*k + j*dimX + i] * t2 + vy[(dimX*dimY)*k + j*dimX + i] * t3;
-            }           
-            
-            // Delete allocated memory
-            free(proj);
-        }}
-    
-    return 1;
-}
-
-float divergence(float *qx_upd, float *qy_upd, float *div_upd, long dimX, long dimY, long dimZ)
-{
-    long i, j, k, l;
-#pragma omp parallel for shared(qx_upd,qy_upd,div_upd) private(i, j, k, l)
-    for(k = 0; k < dimZ; k++)   {
-        for(j = 0; j < dimY; j++)   {
-            l = j * dimX;            
-            for(i = 0; i < dimX; i++)   {
-                if(i != dimX-1)
-                {
-                    // ux[k][i+l] = u[k][i+1+l] - u[k][i+l]
-                    div_upd[(dimX*dimY)*k + i+1+l] -= qx_upd[(dimX*dimY)*k + i+l];
-                    div_upd[(dimX*dimY)*k + i+l] += qx_upd[(dimX*dimY)*k + i+l];
-                }
-                
-                if(j != dimY-1)
-                {
-                    // uy[k][i+l] = u[k][i+width+l] - u[k][i+l]
-                    //div_upd[(dimX*dimY)*k + i+dimY+l] -= qy_upd[(dimX*dimY)*k + i+l];
-                    div_upd[(dimX*dimY)*k + i+(j+1)*dimX] -= qy_upd[(dimX*dimY)*k + i+l];                    
-                    div_upd[(dimX*dimY)*k + i+l] += qy_upd[(dimX*dimY)*k + i+l];
-                }
-            }
-        }
-    }
-    return *div_upd;
-}
diff --git a/Core/regularisers_CPU/TNV_core.h b/Core/regularisers_CPU/TNV_core.h
deleted file mode 100644
index aa050a4..0000000
--- a/Core/regularisers_CPU/TNV_core.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <math.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <stdio.h>
-#include "omp.h"
-#include "utils.h"
-#include "CCPiDefines.h"
-
-#define fTiny 0.00000001f
-#define fLarge 100000000.0f
-#define INFNORM -1
-
-#define MAX(i,j) ((i)<(j) ? (j):(i))
-#define MIN(i,j) ((i)<(j) ? (i):(j))
-
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float TNV_CPU_main(float *Input, float *u, float lambda, int maxIter, float tol, int dimX, int dimY, int dimZ);
-
-/*float PDHG(float *A, float *B, float tau, float sigma, float theta, float lambda, int p, int q, int r, float tol, int maxIter, int d_c, int d_w, int d_h);*/
-CCPI_EXPORT float proxG(float *u_upd, float *v, float *f, float taulambda, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float gradient(float *u_upd, float *gradx_upd, float *grady_upd, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float proxF(float *gx, float *gy, float *vx, float *vy, float sigma, int p, int q, int r, long dimX, long dimY, long dimZ);
-CCPI_EXPORT float divergence(float *qx_upd, float *qy_upd, float *div_upd, long dimX, long dimY, long dimZ);
-#ifdef __cplusplus
-}
-#endif
\ No newline at end of file
diff --git a/Core/regularisers_CPU/utils.c b/Core/regularisers_CPU/utils.c
deleted file mode 100644
index 7a4e80b..0000000
--- a/Core/regularisers_CPU/utils.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazanteev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "utils.h"
-#include <math.h>
-
-/* Copy Image (float) */
-float copyIm(float *A, float *U, long dimX, long dimY, long dimZ)
-{
-	long j;
-#pragma omp parallel for shared(A, U) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-	return *U;
-}
-
-/* Copy Image */
-unsigned char copyIm_unchar(unsigned char *A, unsigned char *U, int dimX, int dimY, int dimZ)
-{
-	int j;
-#pragma omp parallel for shared(A, U) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-	return *U;
-}
-
-/*Roll image symmetrically from top to bottom*/
-float copyIm_roll(float *A, float *U, int dimX, int dimY, int roll_value, int switcher)
-{
-    int i, j;
-#pragma omp parallel for shared(U, A) private(i,j)
-    for (i=0; i<dimX; i++) {
-        for (j=0; j<dimY; j++) {
-            if (switcher == 0) {
-                if (j < (dimY - roll_value)) U[j*dimX + i] = A[(j+roll_value)*dimX + i];
-                else U[j*dimX + i] = A[(j - (dimY - roll_value))*dimX + i];
-            }
-            else {
-                if (j < roll_value) U[j*dimX + i] = A[(j+(dimY - roll_value))*dimX + i];
-                else U[j*dimX + i] = A[(j - roll_value)*dimX + i];
-            }
-        }}
-    return *U;
-}
-
-/* function that calculates TV energy
- * type - 1:  2*lambda*min||\nabla u|| + ||u -u0||^2
- * type - 2:  2*lambda*min||\nabla u|| 
- * */
-float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY)
-{
-	int i, j, i1, j1, index;
-	float NOMx_2, NOMy_2, E_Grad=0.0f, E_Data=0.0f;
-	
-	/* first calculate \grad U_xy*/	
-        for(j=0; j<dimY; j++) {
-            for(i=0; i<dimX; i++) {
-				index = j*dimX+i;
-                /* boundary conditions */
-                i1 = i + 1; if (i == dimX-1) i1 = i;
-                j1 = j + 1; if (j == dimY-1) j1 = j;
-                
-                /* Forward differences */                
-                NOMx_2 = powf((float)(U[j1*dimX + i] - U[index]),2); /* x+ */
-                NOMy_2 = powf((float)(U[j*dimX + i1] - U[index]),2); /* y+ */
-                E_Grad += 2.0f*lambda*sqrtf((float)(NOMx_2) + (float)(NOMy_2)); /* gradient term energy */
-                E_Data += powf((float)(U[index]-U0[index]),2); /* fidelity term energy */
-			}
-		}
-		if (type == 1) E_val[0] = E_Grad + E_Data;
-		if (type == 2) E_val[0] = E_Grad;
-		return *E_val;
-}
-
-float TV_energy3D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY, int dimZ)
-{
-	long i, j, k, i1, j1, k1, index;
-	float NOMx_2, NOMy_2, NOMz_2, E_Grad=0.0f, E_Data=0.0f;
-	
-	/* first calculate \grad U_xy*/	
-    for(j=0; j<(long)(dimY); j++) {
-        for(i=0; i<(long)(dimX); i++) {
-            for(k=0; k<(long)(dimZ); k++) {
-				index = (dimX*dimY)*k + j*dimX+i;
-                /* boundary conditions */
-                i1 = i + 1; if (i == (long)(dimX-1)) i1 = i;
-                j1 = j + 1; if (j == (long)(dimY-1)) j1 = j;
-                k1 = k + 1; if (k == (long)(dimZ-1)) k1 = k;
-                
-                /* Forward differences */                
-                NOMx_2 = powf((float)(U[(dimX*dimY)*k + j1*dimX+i] - U[index]),2); /* x+ */
-                NOMy_2 = powf((float)(U[(dimX*dimY)*k + j*dimX+i1] - U[index]),2); /* y+ */
-                NOMz_2 = powf((float)(U[(dimX*dimY)*k1 + j*dimX+i] - U[index]),2); /* z+ */
-                
-                E_Grad += 2.0f*lambda*sqrtf((float)(NOMx_2) + (float)(NOMy_2) + (float)(NOMz_2)); /* gradient term energy */
-                E_Data += (powf((float)(U[index]-U0[index]),2)); /* fidelity term energy */
-			}
-		}
-	}
-		if (type == 1) E_val[0] = E_Grad + E_Data;
-		if (type == 2) E_val[0] = E_Grad;
-		return *E_val;
-}
diff --git a/Core/regularisers_CPU/utils.h b/Core/regularisers_CPU/utils.h
deleted file mode 100644
index cfaf6d7..0000000
--- a/Core/regularisers_CPU/utils.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include <stdlib.h>
-#include <memory.h>
-#include "CCPiDefines.h"
-#include "omp.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-CCPI_EXPORT float copyIm(float *A, float *U, long dimX, long dimY, long dimZ);
-CCPI_EXPORT unsigned char copyIm_unchar(unsigned char *A, unsigned char *U, int dimX, int dimY, int dimZ);
-CCPI_EXPORT float copyIm_roll(float *A, float *U, int dimX, int dimY, int roll_value, int switcher);
-CCPI_EXPORT float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY);
-CCPI_EXPORT float TV_energy3D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY, int dimZ);
-#ifdef __cplusplus
-}
-#endif
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
deleted file mode 100644
index a4dbe70..0000000
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
+++ /dev/null
@@ -1,268 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "Diffus_4thO_GPU_core.h"
-#include "shared.h"
-
-/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma)
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for explicit scheme
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
- */
-
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-    
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-#define EPS 1.0e-7
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-__global__ void Weighted_Laplc2D_kernel(float *W_Lapl, float *U0, float sigma, int dimX, int dimY)
-{
-		int i1,i2,j1,j2;
-		float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
-    
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-            
-            /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-
-				gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
-				gradX_sq = powf(gradX,2);
-				
-				gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
-                gradY_sq = powf(gradY,2);
-                
-                gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
-                gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
-                
-                gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
-                xy_2 = 2.0f*gradX*gradY*gradXY;
-                
-                denom =  gradX_sq + gradY_sq;
-                
-                if (denom <= EPS) {
-                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
-                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS; 
-                    }
-                else  {
-                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
-                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;  
-                    }
-
-                c = 1.0f/(1.0f + denom/sigma);
-                c_sq = c*c;
-                
-                W_Lapl[index] = c_sq*V_norm + c*V_orth;
-		}
-	return;
-} 
-
-__global__ void Diffusion_update_step2D_kernel(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, int dimX, int dimY)
-{
-	int i1,i2,j1,j2;
-    float gradXXc, gradYYc;
-
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-            
-            /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-					
-                    gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
-                    gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
-
-                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
-		}
-	return;
-} 
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-__global__ void Weighted_Laplc3D_kernel(float *W_Lapl, float *U0, float sigma, int dimX, int dimY, int dimZ)
-{
-		int i1,i2,j1,j2,k1,k2;
-		float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-		int j = blockDim.y * blockIdx.y + threadIdx.y;
-		int k = blockDim.z * blockIdx.z + threadIdx.z;
-		
-		if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-		    
-		    /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-			k1 = k+1; if (k1 == dimZ) k1 = k-1;
-			k2 = k-1; if (k2 < 0) k2 = k+1;
-		
-				int index = (dimX*dimY)*k + j*dimX+i;
-				
-				gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
-				gradX_sq = pow(gradX,2);
-				
-				gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
-                gradY_sq = pow(gradY,2);
-                
-                gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
-                gradZ_sq = pow(gradZ,2);
-                
-                gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
-                gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
-                gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
-                                
-                gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
-                gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
-                gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
-                
-                xy_2  = 2.0f*gradX*gradY*gradXY;
-                xyz_1 = 2.0f*gradX*gradZ*gradXZ;
-                xyz_2 = 2.0f*gradY*gradZ*gradYZ;
-                
-                denom =  gradX_sq + gradY_sq + gradZ_sq;
-                
-					if (denom <= EPS) {
-					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
-                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
-					}
-					else  {
-					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
-                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
-					}
-
-                c = 1.0f/(1.0f + denom/sigma);
-                c_sq = c*c;
-                
-            W_Lapl[index] = c_sq*V_norm + c*V_orth;
-		}
-	return;
-}
-__global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, int dimX, int dimY, int dimZ)
-{
-	int i1,i2,j1,j2,k1,k2;
-    float gradXXc, gradYYc, gradZZc;
-
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-		int j = blockDim.y * blockIdx.y + threadIdx.y;
-		int k = blockDim.z * blockIdx.z + threadIdx.z;
-		
-		if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-		    
-		    /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == dimX) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == dimY) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-			k1 = k+1; if (k1 == dimZ) k1 = k-1;
-			k2 = k-1; if (k2 < 0) k2 = k+1;
-			
-			int index = (dimX*dimY)*k + j*dimX+i;
-			
-                    gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
-                    gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
-                    gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
-                    
-                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
-		}
-	return;
-}
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-/********************* MAIN HOST FUNCTION ******************/
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
-{
-		int dimTotal, dev = 0;
-		CHECK(cudaSetDevice(dev));
-        float *d_input, *d_output, *d_W_Lapl;
-        float sigmaPar2;
-        sigmaPar2 = sigmaPar*sigmaPar;
-        dimTotal = N*M*Z;
-        
-        CHECK(cudaMalloc((void**)&d_input,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_output,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_W_Lapl,dimTotal*sizeof(float)));
-                
-        CHECK(cudaMemcpy(d_input,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        
-	if (Z == 1) {
-	     /*2D case */
-        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-        dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
-             
-        for(int n=0; n < iterationsNumb; n++) {
-				/* Calculating weighted Laplacian */
-				Weighted_Laplc2D_kernel<<<dimGrid,dimBlock>>>(d_W_Lapl, d_output, sigmaPar2, N, M);
-				CHECK(cudaDeviceSynchronize());
-				/* Perform iteration step */
-				Diffusion_update_step2D_kernel<<<dimGrid,dimBlock>>>(d_output, d_input, d_W_Lapl, lambdaPar, sigmaPar2, tau, N, M);
-				CHECK(cudaDeviceSynchronize());
-        }
-	}
-	else {
-		/*3D case*/
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKZSIZE));
-			for(int n=0; n < iterationsNumb; n++) {
-				/* Calculating weighted Laplacian */
-				Weighted_Laplc3D_kernel<<<dimGrid,dimBlock>>>(d_W_Lapl, d_output, sigmaPar2, N, M, Z);
-				CHECK(cudaDeviceSynchronize());
-				/* Perform iteration step */
-				Diffusion_update_step3D_kernel<<<dimGrid,dimBlock>>>(d_output, d_input, d_W_Lapl, lambdaPar, sigmaPar2, tau, N, M, Z);
-				CHECK(cudaDeviceSynchronize());
-			}
-		}
-        CHECK(cudaMemcpy(Output,d_output,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
-        CHECK(cudaFree(d_input));
-        CHECK(cudaFree(d_output));
-        CHECK(cudaFree(d_W_Lapl));
-        return 0;
-}
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
deleted file mode 100644
index 77d5d79..0000000
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __Diff_4thO_GPU_H__
-#define __Diff_4thO_GPU_H__
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
-
-#endif 
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
deleted file mode 100644
index 87871be..0000000
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
+++ /dev/null
@@ -1,473 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "LLT_ROF_GPU_core.h"
-#include "shared.h"
-
-/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
- * 
-* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
-* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
-* lambdaLLT starting with smaller values. 
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambdaROF - ROF-related regularisation parameter
-* 3. lambdaLLT - LLT-related regularisation parameter
-* 4. tau - time-marching step 
-* 5. iter - iterations number (for both models)
-*
-* Output:
-* Filtered/regularised image
-*
-* References: 
-* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*/
-
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-    
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-
-
-#define EPS_LLT 0.01
-#define EPS_ROF 1.0e-12
-
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-__host__ __device__ int signLLT (float x)
-{
-        return (x > 0) - (x < 0);
-}        
-   
-/*************************************************************************/
-/**********************LLT-related functions *****************************/
-/*************************************************************************/
-__global__ void der2D_LLT_kernel(float *U, float *D1, float *D2, int dimX, int dimY)
-    {
-		int i_p, i_m, j_m, j_p;
-		float dxx, dyy, denom_xx, denom_yy;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-            
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
-			dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
-
-			denom_xx = abs(dxx) + EPS_LLT;
-			denom_yy = abs(dyy) + EPS_LLT;
-
-			D1[index] = dxx / denom_xx;
-			D2[index] = dyy / denom_yy;
-		}
-	}
-	
-__global__ void der3D_LLT_kernel(float* U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
-    {
-		int i_p, i_m, j_m, j_p, k_p, k_m;
-		float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-			
-        /* symmetric boundary conditions (Neuman) */
- 		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
- 		i_m = i - 1; if (i_m < 0) i_m = i + 1;
- 		j_p = j + 1; if (j_p == dimY) j_p = j - 1;
- 		j_m = j - 1; if (j_m < 0) j_m = j + 1;
- 		k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
- 		k_m = k - 1; if (k_m < 0) k_m = k + 1;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;
-      	
-      	dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
- 		dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
- 		dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
- 
- 		denom_xx = abs(dxx) + EPS_LLT;
- 		denom_yy = abs(dyy) + EPS_LLT;
- 		denom_zz = abs(dzz) + EPS_LLT;
- 
- 		D1[index] = dxx / denom_xx;
- 		D2[index] = dyy / denom_yy;
- 		D3[index] = dzz / denom_zz;
-		}
-	}
-
-/*************************************************************************/
-/**********************ROF-related functions *****************************/
-/*************************************************************************/
-
-/* first-order differences 1 */
-__global__ void D1_func2D_ROF_kernel(float* Input, float* D1, int N, int M)
-    {
-		int i1, j1, i2;
-		float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;        
-        
-        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
-            
-            /* boundary conditions (Neumann reflections) */
-                i1 = i + 1; if (i1 >= N) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= M) j1 = j-1;
-		
-		     /* Forward-backward differences */
-                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
-                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */                
-                NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
-                
-                denom1 = NOMx_1*NOMx_1;
-                denom2 = 0.5f*(signLLT((float)NOMy_1) + signLLT((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0)));
-                denom2 = denom2*denom2;
-                T1 = sqrt(denom1 + denom2 + EPS_ROF);
-                D1[index] = NOMx_1/T1;
-		}		
-	}
-	
-/* differences 2 */
-__global__ void D2_func2D_ROF_kernel(float* Input, float* D2, int N, int M)      
-    {
-		int i1, j1, j2;
-		float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;        
-        
-        if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
-            
-            /* boundary conditions (Neumann reflections) */
-                i1 = i + 1; if (i1 >= N) i1 = i-1;
-                j1 = j + 1; if (j1 >= M) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1; 
-		
-                /* Forward-backward differences */
-                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
-                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
-                NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
-                
-                denom1 = NOMy_1*NOMy_1;
-                denom2 = 0.5f*(signLLT((float)NOMx_1) + signLLT((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0)));
-                denom2 = denom2*denom2;
-                T2 = sqrt(denom1 + denom2 + EPS_ROF);
-                D2[index] = NOMy_1/T2;	
-		}		
-	}
-
- 
-    /* differences 1 */
-__global__ void D1_func3D_ROF_kernel(float* Input, float* D1, int dimX, int dimY, int dimZ)      
-    {
-		float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
-		int i1,i2,k1,j1,j2,k2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-            
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
-                    NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */                    
-                    NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
-                    
-                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
-                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMx_1*NOMx_1;
-                    denom2 = 0.5*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T1 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
-                    D1[index] = NOMx_1/T1;	
-		}		
-	}      
-
-    /* differences 2 */
-    __global__ void D2_func3D_ROF_kernel(float* Input, float* D2, int dimX, int dimY, int dimZ)      
-    {
-		float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
-		int i1,i2,k1,j1,j2,k2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;     
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
-                    NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
-                    NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
-                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMy_1*NOMy_1;
-                    denom2 = 0.5*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T2 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
-                    D2[index] = NOMy_1/T2;
-		}
-	}
-	
-	  /* differences 3 */
-    __global__ void D3_func3D_ROF_kernel(float* Input, float* D3, int dimX, int dimY, int dimZ)      
-    {
-		float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
-		int i1,i2,k1,j1,j2,k2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;     
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
-				i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                k2 = k - 1; if (k2 < 0) k2 = k+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
-                NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
-                NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
-                NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
-               
-                denom1 = NOMz_1*NOMz_1;
-                denom2 = 0.5*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
-                denom2 = denom2*denom2;
-                denom3 = 0.5*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
-                denom3 = denom3*denom3;
-                T3 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
-                D3[index] = NOMz_1/T3;
-		}
-	}
-/*************************************************************************/
-/**********************ROF-LLT-related functions *************************/
-/*************************************************************************/
-
-__global__ void Update2D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, int dimX, int dimY)
-{
-		
-		int i_p, i_m, j_m, j_p;
-		float div, laplc, dxx, dyy, dv1, dv2;
-	
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-            
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-
-			index = j*dimX+i;
-					
-			/*LLT-related part*/
-			dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
-			dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
-			laplc = dxx + dyy; /*build Laplacian*/
-			/*ROF-related part*/
-			dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
-            dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
-			div = dv1 + dv2; /*build Divirgent*/
-            
-			/*combine all into one cost function to minimise */
-            U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
-		}
-}
-
-__global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, int dimX, int dimY, int dimZ)
-{
-	int i_p, i_m, j_m, j_p, k_p, k_m;
-	float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
-	
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-			
-			/* symmetric boundary conditions (Neuman) */
-			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
-			i_m = i - 1; if (i_m < 0) i_m = i + 1;
-			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
-			j_m = j - 1; if (j_m < 0) j_m = j + 1;
-			k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
-			k_m = k - 1; if (k_m < 0) k_m = k + 1;
-        
-			int index = (dimX*dimY)*k + j*dimX+i;
-      	
-			/*LLT-related part*/
-			dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
-			dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
-			dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
-			laplc = dxx + dyy + dzz; /*build Laplacian*/
-			
-			/*ROF-related part*/
-			dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
-            dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
-            dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
-			div = dv1 + dv2 + dv3; /*build Divirgent*/
-            
-			/*combine all into one cost function to minimise */
-            U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
-        }
-}
-
-/*******************************************************************/
-/************************ HOST FUNCTION ****************************/
-/*******************************************************************/
-
-extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
-{
-	    // set up device
-		int dev = 0;
-		int DimTotal;
-		DimTotal = N*M*Z;
-		CHECK(cudaSetDevice(dev));
-        float *d_input, *d_update;
-        float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL;
-        
-	if (Z == 0) {Z = 1;}
-	
-        CHECK(cudaMalloc((void**)&d_input,DimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_update,DimTotal*sizeof(float)));
-        
-        CHECK(cudaMalloc((void**)&D1_LLT,DimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&D2_LLT,DimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&D3_LLT,DimTotal*sizeof(float)));
-        
-        CHECK(cudaMalloc((void**)&D1_ROF,DimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&D2_ROF,DimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&D3_ROF,DimTotal*sizeof(float)));
-        
-        CHECK(cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        CHECK(cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        
-    if (Z == 1) {
-			// TV - 2D case
-            dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-            dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
-             
-            for(int n=0; n < iterationsNumb; n++) {
-                /****************ROF******************/
-				/* calculate first-order differences */
-                D1_func2D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D1_ROF, N, M);
-                CHECK(cudaDeviceSynchronize());
-				D2_func2D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D2_ROF, N, M);
-                CHECK(cudaDeviceSynchronize());                
-                /****************LLT******************/
-                 /* estimate second-order derrivatives */
-				der2D_LLT_kernel<<<dimGrid,dimBlock>>>(d_update, D1_LLT, D2_LLT, N, M);
-				/* Joint update for ROF and LLT models */
-				Update2D_LLT_ROF_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, N, M);
-                CHECK(cudaDeviceSynchronize());
-            }
-    }
-    else {
-			// 3D case
-            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-            dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
-           
-            for(int n=0; n < iterationsNumb; n++) {
-                /****************ROF******************/
-				/* calculate first-order differences */
-                D1_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D1_ROF, N, M, Z);
-                CHECK(cudaDeviceSynchronize());
-				D2_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D2_ROF, N, M, Z);
-                CHECK(cudaDeviceSynchronize());        
-                D3_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D3_ROF, N, M, Z);
-                CHECK(cudaDeviceSynchronize());        
-                /****************LLT******************/
-                 /* estimate second-order derrivatives */
-				der3D_LLT_kernel<<<dimGrid,dimBlock>>>(d_update, D1_LLT, D2_LLT, D3_LLT, N, M, Z);
-				/* Joint update for ROF and LLT models */
-				Update3D_LLT_ROF_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, N, M, Z);
-                CHECK(cudaDeviceSynchronize());
-            }
-    }        
-        CHECK(cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost));
-        CHECK(cudaFree(d_input));
-        CHECK(cudaFree(d_update));
-        CHECK(cudaFree(D1_LLT));
-        CHECK(cudaFree(D2_LLT));
-        CHECK(cudaFree(D3_LLT));
-        CHECK(cudaFree(D1_ROF));
-        CHECK(cudaFree(D2_ROF));
-        CHECK(cudaFree(D3_ROF));
-        return 0;
-}
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
deleted file mode 100644
index a6bfcc7..0000000
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ROFLLTGPU_H__
-#define __ROFLLTGPU_H__
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
-
-#endif 
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
deleted file mode 100644
index ff7ce4d..0000000
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu
+++ /dev/null
@@ -1,345 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "NonlDiff_GPU_core.h"
-#include "shared.h"
-
-/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for explicit scheme
- * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
-
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-    
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-#define EPS 1.0e-5
-    
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-__host__ __device__ int signNDF (float x)
-{
-        return (x > 0) - (x < 0);
-}        
-   
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-__global__ void LinearDiff2D_kernel(float *Input, float *Output, float lambdaPar, float tau, int N, int M)
-    {
-		int i1,i2,j1,j2;
-		float e,w,n,s,e1,w1,n1,s1;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;
-        
-        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
-            
-            /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == N) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == M) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            
-		        e = Output[j*N+i1];
-                w = Output[j*N+i2];
-                n = Output[j1*N+i];
-                s = Output[j2*N+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index])); 
-		}
-	} 
-    
- __global__ void NonLinearDiff2D_kernel(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, int N, int M)
-    {
-		int i1,i2,j1,j2;
-		float e,w,n,s,e1,w1,n1,s1;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;
-        
-        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
-            
-            /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == N) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == M) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-            
-		        e = Output[j*N+i1];
-                w = Output[j*N+i2];
-                n = Output[j1*N+i];
-                s = Output[j2*N+i];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                
-            if (penaltytype == 1){
-            /* Huber penalty */
-            if (abs(e1) > sigmaPar) e1 =  signNDF(e1);
-            else e1 = e1/sigmaPar;
-            
-            if (abs(w1) > sigmaPar) w1 =  signNDF(w1);
-            else w1 = w1/sigmaPar;
-            
-            if (abs(n1) > sigmaPar) n1 =  signNDF(n1);
-            else n1 = n1/sigmaPar;
-            
-            if (abs(s1) > sigmaPar) s1 =  signNDF(s1);
-            else s1 = s1/sigmaPar;
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + pow((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + pow((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + pow((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + pow((s1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (abs(e1) <= sigmaPar) e1 =  e1*pow((1.0f - pow((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (abs(w1) <= sigmaPar) w1 =  w1*pow((1.0f - pow((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (abs(n1) <= sigmaPar) n1 =  n1*pow((1.0f - pow((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (abs(s1) <= sigmaPar) s1 =  s1*pow((1.0f - pow((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            }
-            else printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-                            
-            Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index])); 
-		}
-	} 
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-
-__global__ void LinearDiff3D_kernel(float *Input, float *Output, float lambdaPar, float tau, int N, int M, int Z)
-    {
-		int i1,i2,j1,j2,k1,k2;
-		float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-		int j = blockDim.y * blockIdx.y + threadIdx.y;
-		int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-		int index = (N*M)*k + i + N*j;        
-        
-        if ((i >= 0) && (i < N) && (j >= 0) && (j < M) && (k >= 0) && (k < Z)) {
-            
-            /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == N) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == M) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-			k1 = k+1; if (k1 == Z) k1 = k-1;
-			k2 = k-1; if (k2 < 0) k2 = k+1;
-            
-		        e = Output[(N*M)*k + i1 + N*j];
-                w = Output[(N*M)*k + i2 + N*j];
-                n = Output[(N*M)*k + i + N*j1];
-                s = Output[(N*M)*k + i + N*j2];
-                u = Output[(N*M)*k1 + i + N*j];
-                d = Output[(N*M)*k2 + i + N*j];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                u1 = u - Output[index];
-                d1 = d - Output[index];
-                
-                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index])); 
-		}
-	} 
-
-__global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, int N, int M, int Z)
-    {
-		int i1,i2,j1,j2,k1,k2;
-		float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-		int j = blockDim.y * blockIdx.y + threadIdx.y;
-		int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-		int index = (N*M)*k + i + N*j;        
-        
-        if ((i >= 0) && (i < N) && (j >= 0) && (j < M) && (k >= 0) && (k < Z)) {
-            
-            /* boundary conditions (Neumann reflections) */
-			i1 = i+1; if (i1 == N) i1 = i-1;
-			i2 = i-1; if (i2 < 0) i2 = i+1;
-            j1 = j+1; if (j1 == M) j1 = j-1;
-            j2 = j-1; if (j2 < 0) j2 = j+1;
-			k1 = k+1; if (k1 == Z) k1 = k-1;
-			k2 = k-1; if (k2 < 0) k2 = k+1;
-            
-		        e = Output[(N*M)*k + i1 + N*j];
-                w = Output[(N*M)*k + i2 + N*j];
-                n = Output[(N*M)*k + i + N*j1];
-                s = Output[(N*M)*k + i + N*j2];
-                u = Output[(N*M)*k1 + i + N*j];
-                d = Output[(N*M)*k2 + i + N*j];
-                
-                e1 = e - Output[index];
-                w1 = w - Output[index];
-                n1 = n - Output[index];
-                s1 = s - Output[index];
-                u1 = u - Output[index];
-                d1 = d - Output[index];
-                
-                
-            if (penaltytype == 1){
-            /* Huber penalty */
-            if (abs(e1) > sigmaPar) e1 =  signNDF(e1);
-            else e1 = e1/sigmaPar;
-            
-            if (abs(w1) > sigmaPar) w1 =  signNDF(w1);
-            else w1 = w1/sigmaPar;
-            
-            if (abs(n1) > sigmaPar) n1 =  signNDF(n1);
-            else n1 = n1/sigmaPar;
-            
-            if (abs(s1) > sigmaPar) s1 =  signNDF(s1);
-            else s1 = s1/sigmaPar;
-            
-            if (abs(u1) > sigmaPar) u1 =  signNDF(u1);
-            else u1 = u1/sigmaPar;
-            
-            if (abs(d1) > sigmaPar) d1 =  signNDF(d1);
-            else d1 = d1/sigmaPar;            
-            }
-            else if (penaltytype == 2) {
-            /* Perona-Malik */
-            e1 = (e1)/(1.0f + pow((e1/sigmaPar),2));
-            w1 = (w1)/(1.0f + pow((w1/sigmaPar),2));
-            n1 = (n1)/(1.0f + pow((n1/sigmaPar),2));
-            s1 = (s1)/(1.0f + pow((s1/sigmaPar),2));
-            u1 = (u1)/(1.0f + pow((u1/sigmaPar),2));
-            d1 = (d1)/(1.0f + pow((d1/sigmaPar),2));
-            }
-            else if (penaltytype == 3) {
-            /* Tukey Biweight */
-            if (abs(e1) <= sigmaPar) e1 =  e1*pow((1.0f - pow((e1/sigmaPar),2)), 2);
-            else e1 = 0.0f;
-            if (abs(w1) <= sigmaPar) w1 =  w1*pow((1.0f - pow((w1/sigmaPar),2)), 2);
-            else w1 = 0.0f;
-            if (abs(n1) <= sigmaPar) n1 =  n1*pow((1.0f - pow((n1/sigmaPar),2)), 2);
-            else n1 = 0.0f;
-            if (abs(s1) <= sigmaPar) s1 =  s1*pow((1.0f - pow((s1/sigmaPar),2)), 2);
-            else s1 = 0.0f;
-            if (abs(u1) <= sigmaPar) u1 =  u1*pow((1.0f - pow((u1/sigmaPar),2)), 2);
-            else u1 = 0.0f;
-            if (abs(d1) <= sigmaPar) d1 =  d1*pow((1.0f - pow((d1/sigmaPar),2)), 2);
-            else d1 = 0.0f;
-            }
-            else printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
-
-            Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index])); 
-		}
-	} 
-
-/////////////////////////////////////////////////
-// HOST FUNCTION
-extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
-{
-	    // set up device
-		int dev = 0;
-		CHECK(cudaSetDevice(dev));
-        float *d_input, *d_output;
-        float sigmaPar2;
-        sigmaPar2 = sigmaPar/sqrt(2.0f);
-        
-        CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_output,N*M*Z*sizeof(float)));
-                
-        CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
-        CHECK(cudaMemcpy(d_output,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));      
-        
-	if (Z == 1) {
-	     /*2D case */ 
-        
-        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-        dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
-             
-        for(int n=0; n < iterationsNumb; n++) {
-				if (sigmaPar == 0.0f) {
-				/* linear diffusion (heat equation) */
-				LinearDiff2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, tau, N, M);
-				CHECK(cudaDeviceSynchronize());
-				}
-				else {
-				/* nonlinear diffusion */
-				NonLinearDiff2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, sigmaPar2, tau, penaltytype, N, M);
-				CHECK(cudaDeviceSynchronize());
-				}
-        }
-	}
-	else {
-		/*3D case*/
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKZSIZE));
-			for(int n=0; n < iterationsNumb; n++) {
-				if (sigmaPar == 0.0f) {
-				/* linear diffusion (heat equation) */
-				LinearDiff3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, tau, N, M, Z);
-				CHECK(cudaDeviceSynchronize());
-				}
-				else {
-				/* nonlinear diffusion */
-				NonLinearDiff3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, sigmaPar2, tau, penaltytype, N, M, Z);
-				CHECK(cudaDeviceSynchronize());
-				}
-			}
-        
-		}        
-        CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
-        CHECK(cudaFree(d_input));
-        CHECK(cudaFree(d_output));
-        //cudaDeviceReset();
-        return 0;
-}
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h
deleted file mode 100644
index 5fe457e..0000000
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __NonlDiffGPU_H__
-#define __NonlDiffGPU_H__
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
-
-#endif 
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
deleted file mode 100644
index 98c8488..0000000
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PatchSelect_GPU_core.h"
-#include "shared.h"
-
-/* CUDA implementation of non-local weight pre-calculation for non-local priors
- * Weights and associated indices are stored into pre-allocated arrays and passed
- * to the regulariser
- *
- *
- * Input Parameters:
- * 1. 2D grayscale image (classical 3D version will not be supported but rather 2D + dim extension (TODO))
- * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
- * 3. Similarity window (half-size of the patch window, e.g. 2)
- * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
- * 5. noise-related parameter to calculate non-local weights
- *
- * Output [2D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. Weights_ij - associated weights
- */
-
-
-#define BLKXSIZE 16
-#define BLKYSIZE 16
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define M_PI 3.14159265358979323846
-#define EPS 1.0e-8
-#define CONSTVECSIZE5 121
-#define CONSTVECSIZE7 225
-#define CONSTVECSIZE9 361
-#define CONSTVECSIZE11 529
-#define CONSTVECSIZE13 729
-
-__device__ void swap(float *xp, float *yp) 
-{
-    float temp = *xp; 
-    *xp = *yp; 
-    *yp = temp; 
-}
-__device__ void swapUS(unsigned short *xp, unsigned short *yp) 
-{ 
-    unsigned short temp = *xp; 
-    *xp = *yp; 
-    *yp = temp; 
-}
-
-/********************************************************************************/
-__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{          
-
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
-    float normsum;
-    
-    float Weight_Vec[CONSTVECSIZE5];
-    unsigned short ind_i[CONSTVECSIZE5];
-    unsigned short ind_j[CONSTVECSIZE5];
-
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    long index = i*M+j;      
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-              if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
-                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
-                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
-                                counterG++;
-                            }}                        
-                     }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = __expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-             }
-        }}
-        
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }     
-    /*sorting loop finished*/        
-    /*now select the NumNeighb more prominent weights and store into arrays */     
-    for(x=0; x < NumNeighb; x++) {
-        index2 = (N*M*x) + index;
-        H_i_d[index2] = ind_i[x];
-        H_j_d[index2] = ind_j[x];
-        Weights_d[index2] = Weight_Vec[x];
-    }
-} 
-/********************************************************************************/
-__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{          
-
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
-    float normsum;
-    
-    float Weight_Vec[CONSTVECSIZE7];
-    unsigned short ind_i[CONSTVECSIZE7];
-    unsigned short ind_j[CONSTVECSIZE7];
-
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    long index = i*M+j;      
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-             if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
-                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
-                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
-                                counterG++;
-                            }}                        
-                     }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = __expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-             }
-        }}
-        
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }     
-    /*sorting loop finished*/        
-    /*now select the NumNeighb more prominent weights and store into arrays */     
-    for(x=0; x < NumNeighb; x++) {
-        index2 = (N*M*x) + index;
-        H_i_d[index2] = ind_i[x];
-        H_j_d[index2] = ind_j[x];
-        Weights_d[index2] = Weight_Vec[x];
-    }
-}
-__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{          
-
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
-    float normsum;
-  
-    float Weight_Vec[CONSTVECSIZE9];
-    unsigned short ind_i[CONSTVECSIZE9];
-    unsigned short ind_j[CONSTVECSIZE9];
-
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    long index = i*M+j;      
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
-                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
-                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
-                                counterG++;
-                            }}                        
-                     }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-            }
-        }}
-        
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }     
-    /*sorting loop finished*/        
-    /*now select the NumNeighb more prominent weights and store into arrays */     
-    for(x=0; x < NumNeighb; x++) {
-        index2 = (N*M*x) + index;
-        H_i_d[index2] = ind_i[x];
-        H_j_d[index2] = ind_j[x];
-        Weights_d[index2] = Weight_Vec[x];
-    }                     
-}
-__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{          
-
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
-    float normsum;
-    
-    float Weight_Vec[CONSTVECSIZE11];
-    unsigned short ind_i[CONSTVECSIZE11];
-    unsigned short ind_j[CONSTVECSIZE11];
-
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    long index = i*M+j;      
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
-                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
-                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
-                                counterG++;
-                            }}                        
-                     }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = __expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-           }
-        }}
-        
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }     
-    /*sorting loop finished*/        
-    /*now select the NumNeighb more prominent weights and store into arrays */     
-    for(x=0; x < NumNeighb; x++) {
-        index2 = (N*M*x) + index;
-        H_i_d[index2] = ind_i[x];
-        H_j_d[index2] = ind_j[x];
-        Weights_d[index2] = Weight_Vec[x];
-    }
-} 
-__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{          
-
-    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
-    float normsum;
-    
-    float Weight_Vec[CONSTVECSIZE13];
-    unsigned short ind_i[CONSTVECSIZE13];
-    unsigned short ind_j[CONSTVECSIZE13];
-
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    long index = i*M+j;      
-    
-    counter = 0;
-    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
-        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
-            i1 = i+i_m;
-            j1 = j+j_m;
-            if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
-                normsum = 0.0f; counterG = 0;
-                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
-                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
-                        i2 = i1 + i_c;
-                        j2 = j1 + j_c;
-                        i3 = i + i_c;
-                        j3 = j + j_c;
-                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
-                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
-                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
-                                counterG++;
-                            }}                        
-                     }}
-                /* writing temporarily into vectors */
-                if (normsum > EPS) {
-                    Weight_Vec[counter] = __expf(-normsum/h2);
-                    ind_i[counter] = i1;
-                    ind_j[counter] = j1;
-                    counter++;
-                }
-             }
-        }}
-        
-    /* do sorting to choose the most prominent weights [HIGH to LOW] */
-    /* and re-arrange indeces accordingly */
-    for (x = 0; x < counter-1; x++)  {
-       for (y = 0; y < counter-x-1; y++)  {
-           if (Weight_Vec[y] < Weight_Vec[y+1]) {
-            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
-            swapUS(&ind_i[y], &ind_i[y+1]);
-            swapUS(&ind_j[y], &ind_j[y+1]);  
-            }
-    	}
-    }     
-    /*sorting loop finished*/        
-    /*now select the NumNeighb more prominent weights and store into arrays */     
-    for(x=0; x < NumNeighb; x++) {
-        index2 = (N*M*x) + index;
-        H_i_d[index2] = ind_i[x];
-        H_j_d[index2] = ind_j[x];
-        Weights_d[index2] = Weight_Vec[x];
-    }
-} 
-
-   
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-/********************* MAIN HOST FUNCTION ******************/
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
-{
-    int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return -1;
-    }  
-      
-    int SearchW_full, SimilW_full, counterG, i, j;
-    float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;    
-    unsigned short *H_i_d, *H_j_d;
-    h2 = h*h;
-    
-    dim3 dimBlock(BLKXSIZE,BLKYSIZE);
-    dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));    
-       
-    SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window  size */
-    SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1);   /* the full similarity window  size */
-    
-    /* generate a 2D Gaussian kernel for NLM procedure */
-    Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
-    counterG = 0;
-    for(i=-SimilarWin; i<=SimilarWin; i++) {
-         for(j=-SimilarWin; j<=SimilarWin; j++) {
-              Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin));
-              counterG++;
-    }} /*main neighb loop */
-    
-    
-    /*allocate space on the device*/
-    checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
-    checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
-    checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );
-    checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) );
-    checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
-
-    /* copy data from the host to the device */
-    checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );    
- 
-    /********************** Run CUDA kernel here ********************/
-    if (SearchWindow == 5)  IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
-    else if (SearchWindow == 7)  IndexSelect2D_7_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
-    else if (SearchWindow == 9)  IndexSelect2D_9_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
-    else if (SearchWindow == 11)  IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
-    else if (SearchWindow == 13)  IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
-    else {
-    fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
-        return -1;}    
-    checkCudaErrors(cudaPeekAtLastError() );        
-    checkCudaErrors(cudaDeviceSynchronize());   
-    /***************************************************************/    
-        
-    checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
-    checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );  
-    checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );   
-    
-    
-    cudaFree(Ad); 
-    cudaFree(H_i_d); 
-    cudaFree(H_j_d);    
-    cudaFree(Weights_d);
-    cudaFree(Eucl_Vec_d);
-    cudaDeviceReset();
-    return 0;
-}
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h
deleted file mode 100644
index 8c124d3..0000000
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __NLREG_KERNELS_H_
-#define __NLREG_KERNELS_H_
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
-
-#endif 
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
deleted file mode 100644
index 58b2c41..0000000
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ /dev/null
@@ -1,625 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "TGV_GPU_core.h"
-#include "shared.h"
-
-/* CUDA implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D/3D)
- * 2. lambda - regularisation parameter
- * 3. parameter to control the first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of Chambolle-Pock (Primal-Dual) iterations
- * 6. Lipshitz constant (default is 12)
- *
- * Output:
- * Filtered/regulariaed image 
- *
- * References:
- * [1] K. Bredies "Total Generalized Variation"
- */
-    
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8    
-    
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-#define EPS 1.0e-7
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-
-
-/********************************************************************/
-/***************************2D Functions*****************************/
-/********************************************************************/
-__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
-{    
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); 
-            else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]); 
-            if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index])  - V2[index]);
-            else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
-		}
-	return;
-} 
-
-__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
-{
-   	float grad_magn;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-            
-            grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2));
-            grad_magn = grad_magn/alpha1;
-            if (grad_magn > 1.0f) {
-                P1[index] /= grad_magn;
-                P2[index] /= grad_magn;
-            }
-		}
-	return;
-} 
-
-__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
-{
-        float q1, q2, q11, q22;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {            
-            /* symmetric boundary conditions (Neuman) */
-    	    q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
-            /* boundary conditions (Neuman) */
-            if (i != dimX-1){
-                q1 = V1[j*dimX+(i+1)] - V1[index];
-                q11 = V2[j*dimX+(i+1)] - V2[index];
-            }
-            if (j != dimY-1) {
-                q2 = V2[(j+1)*dimX+i] - V2[index];
-                q22 = V1[(j+1)*dimX+i] - V1[index];
-            }
-            Q1[index] += sigma*(q1);
-            Q2[index] += sigma*(q2);
-            Q3[index] += sigma*(0.5f*(q11 + q22));
-	}
-	return;
-} 
-
-__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
-{
-	float grad_magn;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {            
-            grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
-            grad_magn = grad_magn/alpha0;
-            if (grad_magn > 1.0f) {
-                Q1[index] /= grad_magn;
-                Q2[index] /= grad_magn;
-                Q3[index] /= grad_magn;
-        	    }
-	}
-	return;
-} 
-
-__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
-{
-	float P_v1, P_v2, div;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
-			
-            if (i == 0) P_v1 = P1[index];
-            else P_v1 = P1[index] - P1[j*dimX+(i-1)];
-            if (j == 0) P_v2 = P2[index];
-            else  P_v2 = P2[index] - P2[(j-1)*dimX+i];
-            div = P_v1 + P_v2;
-            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
-		}
-	return;
-} 
-
-__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
-{
-	float q1, q3_x, q2, q3_y, div1, div2;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + dimX*j;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {			
-   	    q2 = 0.0f;  q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
-            /* boundary conditions (Neuman) */
-            if (i != 0) {
-                q1 = Q1[index] - Q1[j*dimX+(i-1)];
-                q3_x = Q3[index] - Q3[j*dimX+(i-1)];
-            }
-            if (j != 0) {
-                q2 = Q2[index] - Q2[(j-1)*dimX+i];
-                q3_y = Q3[index] - Q3[(j-1)*dimX+i];
-            }
-            div1 = q1 + q3_y;
-            div2 = q3_x + q2;
-            V1[index] += tau*(P1[index] + div1);
-            V2[index] += tau*(P2[index] + div2);
-	}
-	return;
-} 
-
-__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)   {
-        U_old[index] = U[index];
-    }
-}
-
-__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)   {
-        V1_old[index] = V1[index];
-        V2_old[index] = V2[index];
-    }
-}
-
-__global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        U[index] = 2.0f*U[index] - U_old[index];
-    }
-}
-
-
-__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        V1[index] = 2.0f*V1[index] - V1_old[index];
-        V2[index] = 2.0f*V2[index] - V2_old[index];  
-    }
-}
-/********************************************************************/
-/***************************3D Functions*****************************/
-/********************************************************************/
-__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
-{    
-	int index;
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-	
-	    index = (dimX*dimY)*k + j*dimX+i;
-            /* symmetric boundary conditions (Neuman) */
-            if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); 
-            else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index])  - V1[index]); 
-            if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index])  - V2[index]);
-            else  P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index])  - V2[index]);
-            if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index])  - V3[index]);
-            else  P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index])  - V3[index]);
-	}
-	return;
-} 
-
-__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1)
-{
-   	float grad_magn;
-   	int index;
-   	
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
-	    index = (dimX*dimY)*k + j*dimX+i;
-            
-            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
-            if (grad_magn > 1.0f) {
-                P1[index] /= grad_magn;
-                P2[index] /= grad_magn;
-                P3[index] /= grad_magn;
-            }
-	}
-	return;
-}
-
-__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma)
-{
-	int index; 
-        float q1, q2, q3, q11, q22, q33, q44, q55, q66;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
-	    
-	    index = (dimX*dimY)*k + j*dimX+i;	    
-	    q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
-            /* symmetric boundary conditions (Neuman) */
-            if (i != dimX-1){ 
-                q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];              
-                q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
-                q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
-            }
-            if (j != dimY-1) {
-                q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];                
-                q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
-                q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
-            }
-            if (k != dimZ-1) {
-                q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
-                q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
-                q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
-            }
-            
-            Q1[index] += sigma*(q1); /*Q11*/
-            Q2[index] += sigma*(q2); /*Q22*/            
-            Q3[index] += sigma*(q3); /*Q33*/
-            Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
-            Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
-            Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
-	}
-	return;
-}
-
-
-__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0)
-{
-	float grad_magn;
-	int index;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
-	    
-        index = (dimX*dimY)*k + j*dimX+i;	
-	
-	grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
-            grad_magn = grad_magn/alpha0;
-            if (grad_magn > 1.0f) {
-                Q1[index] /= grad_magn;
-                Q2[index] /= grad_magn;
-                Q3[index] /= grad_magn;
-                Q4[index] /= grad_magn;
-                Q5[index] /= grad_magn;
-                Q6[index] /= grad_magn;
-            }
-	}
-	return;
-} 
-__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau)
-{
-	float P_v1, P_v2, P_v3, div;
-	int index;
-
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
-
-        index = (dimX*dimY)*k + j*dimX+i;	
-			
-        if (i == 0) P_v1 = P1[index];
-        else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
-        if (j == 0) P_v2 = P2[index];
-        else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
-        if (k == 0) P_v3 = P3[index];
-        else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];              
-                      
-        div = P_v1 + P_v2 + P_v3;
-        U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);             
-	}
-	return;
-}
-__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau)
-{
-	float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
-	int index;
-	
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
-
-        index = (dimX*dimY)*k + j*dimX+i;	
-        
-	q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
-        /* Q1 - Q11, Q2 - Q22, Q3 -  Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/            
-        /* symmetric boundary conditions (Neuman) */
-        if (i != 0) {
-                q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
-                q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];                
-                q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
-        }
-       if (j != 0) {
-                q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
-                q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
-                q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
-       }
-       if (k != 0) {
-                q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
-                q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
-                q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
-       }
-       div1 = q1 + q4y + q5z;
-       div2 = q4x + q2 + q6z;            
-       div3 = q5x + q6y + q3;
-            
-        V1[index] += tau*(P1[index] + div1);
-        V2[index] += tau*(P2[index] + div2);
-        V3[index] += tau*(P3[index] + div3);
-	}
-	return;
-} 
-
-__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
-{
-    int index;
-	
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;    
-    
-    index = (dimX*dimY)*k + j*dimX+i;
-    
-    if (index < num_total) {	
-      	U_old[index] = U[index];	
-    }
-}
-
-__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
-{
-    int index;
-	
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;    
-    
-    index = (dimX*dimY)*k + j*dimX+i;
-    
-    if (index < num_total) {	
-      	V1_old[index] = V1[index];
-	V2_old[index] = V2[index];
-	V3_old[index] = V3[index];	
-    }
-}
-
-__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
-{
-     int index;
-	
-     int i = blockDim.x * blockIdx.x + threadIdx.x;
-     int j = blockDim.y * blockIdx.y + threadIdx.y;
-     int k = blockDim.z * blockIdx.z + threadIdx.z;    
-         
-     index = (dimX*dimY)*k + j*dimX+i;
-    
-    if (index < num_total) {
-	   U[index] = 2.0f*U[index] - U_old[index];
-    }
-}  
-
-__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
-{
-     int index;
-	
-     int i = blockDim.x * blockIdx.x + threadIdx.x;
-     int j = blockDim.y * blockIdx.y + threadIdx.y;
-     int k = blockDim.z * blockIdx.z + threadIdx.z;    
-         
-     index = (dimX*dimY)*k + j*dimX+i;
-    
-    if (index < num_total) {
-	   V1[index] = 2.0f*V1[index] - V1_old[index];
-	   V2[index] = 2.0f*V2[index] - V2_old[index];
-	   V3[index] = 2.0f*V3[index] - V3_old[index];
-    }
-}  
-
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-/************************ MAIN HOST FUNCTION ***********************/
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ)
-{
-	int dimTotal, dev = 0;
-	CHECK(cudaSetDevice(dev));
-	
-	dimTotal = dimX*dimY*dimZ;
-       
-        float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
-        tau = pow(L2,-0.5);
-        sigma = pow(L2,-0.5);
-                                      
-        CHECK(cudaMalloc((void**)&d_U0,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_U,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&U_old,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&P1,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&P2,dimTotal*sizeof(float)));
-        
-        CHECK(cudaMalloc((void**)&Q1,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&Q2,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&Q3,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&V1,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&V2,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&V1_old,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&V2_old,dimTotal*sizeof(float)));
-        
-        CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));      
-        
-        if (dimZ == 1) {
-	/*2D case */
-        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-        dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
-             
-        for(int n=0; n < iterationsNumb; n++) {
-			
-	    /* Calculate Dual Variable P */
-            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
-	    CHECK(cudaDeviceSynchronize());
-            /*Projection onto convex set for P*/
-            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
-            CHECK(cudaDeviceSynchronize());
-            /* Calculate Dual Variable Q */
-            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
-            CHECK(cudaDeviceSynchronize());
-             /*Projection onto convex set for Q*/
-            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0);
-            CHECK(cudaDeviceSynchronize());
-            /*saving U into U_old*/
-            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());
-            /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, dimX, dimY, lambda, tau);
-            CHECK(cudaDeviceSynchronize());
-            /*get updated solution U*/
-            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());
-            /*saving V into V_old*/
-            copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());
-            /* upd V*/
-            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
-            CHECK(cudaDeviceSynchronize());
-            /*get new V*/
-            newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
-            CHECK(cudaDeviceSynchronize());            
-	        }
-        }
-        else {
-        /*3D case */
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-        dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKXSIZE));
-        
-        float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
-        
-	CHECK(cudaMalloc((void**)&P3,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&Q4,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&Q5,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&Q6,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float)));
-        CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float)));
-        
-        for(int n=0; n < iterationsNumb; n++) {
-			
-	    /* Calculate Dual Variable P */
-            DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
-	    CHECK(cudaDeviceSynchronize());
-            /*Projection onto convex set for P*/
-            ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1);
-            CHECK(cudaDeviceSynchronize());
-            /* Calculate Dual Variable Q */
-            DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma);
-            CHECK(cudaDeviceSynchronize());
-             /*Projection onto convex set for Q*/
-            ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0);
-            CHECK(cudaDeviceSynchronize());
-            /*saving U into U_old*/
-            copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
-            CHECK(cudaDeviceSynchronize());
-            /*adjoint operation  -> divergence and projection of P*/
-            DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau);
-            CHECK(cudaDeviceSynchronize());
-            /*get updated solution U*/
-            newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
-            CHECK(cudaDeviceSynchronize());
-            /*saving V into V_old*/
-            copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);           
-            CHECK(cudaDeviceSynchronize());
-            /* upd V*/
-            UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau);
-            CHECK(cudaDeviceSynchronize());
-            /*get new V*/
-            newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
-            CHECK(cudaDeviceSynchronize());            
-	        }
-	        
-        CHECK(cudaFree(Q4));
-        CHECK(cudaFree(Q5));
-        CHECK(cudaFree(Q6));
-        CHECK(cudaFree(P3));
-        CHECK(cudaFree(V3));
-        CHECK(cudaFree(V3_old));	                
-        }
-        
-        CHECK(cudaMemcpy(U,d_U,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
-        CHECK(cudaFree(d_U0));
-        CHECK(cudaFree(d_U));
-        CHECK(cudaFree(U_old));
-        CHECK(cudaFree(P1));
-        CHECK(cudaFree(P2));
-        
-        CHECK(cudaFree(Q1));
-        CHECK(cudaFree(Q2));
-        CHECK(cudaFree(Q3));
-        CHECK(cudaFree(V1));
-        CHECK(cudaFree(V2));
-        CHECK(cudaFree(V1_old));
-        CHECK(cudaFree(V2_old));
-        return 0;
-}
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
deleted file mode 100644
index 9f73d1c..0000000
--- a/Core/regularisers_GPU/TGV_GPU_core.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __TGV_GPU_H__
-#define __TGV_GPU_H__
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
-
-#endif 
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
deleted file mode 100755
index b371c5d..0000000
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ /dev/null
@@ -1,564 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "TV_FGP_GPU_core.h"
-#include "shared.h"
-#include <thrust/device_vector.h>
-#include <thrust/transform_reduce.h>
-
-/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambdaPar - regularization parameter 
- * 3. Number of iterations
- * 4. eplsilon: tolerance constant 
- * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
- * 6. nonneg: 'nonnegativity (0 is OFF by default) 
- * 7. print information: 0 (off) or 1 (on) 
- *
- * Output:
- * [1] Filtered/regularized image
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- */
-
-
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-struct square { __host__ __device__ float operator()(float x) { return x * x; } };
-
-/************************************************/
-/*****************2D modules*********************/
-/************************************************/
-__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
-{
-    
-    float val1,val2;
-    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex; 
-    
-    if ((xIndex < N) && (yIndex < M)) {        
-        if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
-        if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
-        //Write final result to global memory
-        D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
-    }
-    return;
-}
-
-__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip)
-{
-    
-    float val1,val2;
-    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) {        
-        
-        /* boundary conditions */
-        if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
-        if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
-        
-        //Write final result to global memory
-        P1[index] = R1[index] + multip*val1;
-        P2[index] = R2[index] + multip*val2;
-    }
-    return;
-}
-
-__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
-{
-    
-    float denom;    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) { 
-        denom = pow(P1[index],2) +  pow(P2[index],2);        
-        if (denom > 1.0f) {
-            P1[index] = P1[index]/sqrt(denom);
-            P2[index] = P2[index]/sqrt(denom);
-        }
-    }
-    return;
-}
-__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
-{
-    
-    float val1, val2;    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) { 
-                val1 = abs(P1[index]);
-                val2 = abs(P2[index]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[index] = P1[index]/val1;
-                P2[index] = P2[index]/val2;
-    }
-    return;
-}
-__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
-{
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) { 
-        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
-        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
-    }
-    return;
-}
-__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        if (Output[index] < 0.0f) Output[index] = 0.0f;
-    }
-}
-/************************************************/
-/*****************3D modules*********************/
-/************************************************/
-__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
-{
-    
-    float val1,val2,val3;
-    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {      
-        if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
-        if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
-        if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
-        //Write final result to global memory
-        D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
-    }
-    return;
-}
-
-__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip)
-{
-    
-    float val1,val2,val3;
-    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) {       
-        /* boundary conditions */
-        if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
-        if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
-        if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];
-        
-        //Write final result to global memory
-        P1[index] = R1[index] + multip*val1;
-        P2[index] = R2[index] + multip*val2;
-        P3[index] = R3[index] + multip*val3;
-    }
-    return;
-}
-
-__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
-{
-    
-    float denom,sq_denom;    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) {
-        denom = pow(P1[index],2) +  pow(P2[index],2) + pow(P3[index],2);
-        
-        if (denom > 1.0f) {
-            sq_denom = 1.0f/sqrt(denom);
-            P1[index] = P1[index]*sq_denom;
-            P2[index] = P2[index]*sq_denom;
-            P3[index] = P3[index]*sq_denom;
-        }
-    }
-    return;
-}
-
-__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
-{
-    
-    float val1, val2, val3;    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) {
-                val1 = abs(P1[index]);
-                val2 = abs(P2[index]);
-                val3 = abs(P3[index]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}
-                P1[index] = P1[index]/val1;
-                P2[index] = P2[index]/val2;
-                P3[index] = P3[index]/val3;
-    }
-    return;
-}
-__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
-{
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) { 
-        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
-        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
-        R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
-    }
-    return;
-}
-
-__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        if (Output[index] < 0.0f) Output[index] = 0.0f;
-    }
-}
-__global__ void FGPcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void FGPcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void FGPResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
-
-__global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
-
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-
-////////////MAIN HOST FUNCTION ///////////////
-extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
-{
-    int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return -1;
-    }
-    
-    int count = 0, i;
-    float re, multip,multip2;    
-	float tk = 1.0f;
-    float tkp1=1.0f;
-        
-    if (dimZ <= 1) {
-		/*2D verson*/
-		int ImSize = dimX*dimY;    
-		float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
-   
-		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
-    
-		/*allocate space for images on device*/
-		checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
-		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
-    
-        checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
-        cudaMemset(P1, 0, ImSize*sizeof(float));
-        cudaMemset(P2, 0, ImSize*sizeof(float));
-        cudaMemset(P1_prev, 0, ImSize*sizeof(float));
-        cudaMemset(P2_prev, 0, ImSize*sizeof(float));
-        cudaMemset(R1, 0, ImSize*sizeof(float));
-        cudaMemset(R2, 0, ImSize*sizeof(float));
-
-        /********************** Run CUDA 2D kernel here ********************/    
-        multip = (1.0f/(8.0f*lambdaPar));
-    
-        /* The main kernel */
-        for (i = 0; i < iter; i++) {
-        
-            /* computing the gradient of the objective function */
-            Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-            
-            if (nonneg != 0) {
-            nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() ); }
-                    
-            /*Taking a step towards minus of the gradient*/
-            Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            /* projection step */
-            if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
-            else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            multip2 = ((tk-1.0f)/tkp1);
-        
-            Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            if (epsil != 0.0f) {
-                /* calculate norm - stopping rules using the Thrust library */
-                FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );               
-                
-                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
-                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
-                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
-                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
-                    
-                re = (reduction/reduction2);      
-                if (re < epsil)  count++;
-                    if (count > 4) break;       
-             
-                FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );                                              
-            }                  
-        
-            FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );       
- 
-            tk = tkp1;
-        }
-        if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);   
-            /***************************************************************/    
-            //copy result matrix from device to host memory
-            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
-    
-            cudaFree(d_input);
-            cudaFree(d_update);
-            if (epsil != 0.0f) cudaFree(d_update_prev);
-            cudaFree(P1);
-            cudaFree(P2);
-            cudaFree(P1_prev);
-            cudaFree(P2_prev);
-            cudaFree(R1);
-            cudaFree(R2);
-    }
-    else {
-            /*3D verson*/
-            int ImSize = dimX*dimY*dimZ;    
-            float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
-   
-            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-            dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
-    
-            /*allocate space for images on device*/
-            checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );            
-            checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
-    
-            checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
-            cudaMemset(P1, 0, ImSize*sizeof(float));
-            cudaMemset(P2, 0, ImSize*sizeof(float));
-            cudaMemset(P3, 0, ImSize*sizeof(float));
-            cudaMemset(P1_prev, 0, ImSize*sizeof(float));
-            cudaMemset(P2_prev, 0, ImSize*sizeof(float));
-            cudaMemset(P3_prev, 0, ImSize*sizeof(float));
-            cudaMemset(R1, 0, ImSize*sizeof(float));
-            cudaMemset(R2, 0, ImSize*sizeof(float));
-            cudaMemset(R3, 0, ImSize*sizeof(float));
-            /********************** Run CUDA 3D kernel here ********************/    
-            multip = (1.0f/(26.0f*lambdaPar));
-    
-            /* The main kernel */
-        for (i = 0; i < iter; i++) {
-        
-            /* computing the gradient of the objective function */
-            Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            if (nonneg != 0) {
-            nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() ); }
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            /* projection step */
-            if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
-            else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            multip2 = ((tk-1.0f)/tkp1);
-        
-            Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );           
-        
-            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );   
-            
-            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );      
- 
-            tk = tkp1;
-        }
-        if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);   
-            /***************************************************************/    
-            //copy result matrix from device to host memory
-            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
-    
-            cudaFree(d_input);
-            cudaFree(d_update);            
-            cudaFree(P1);
-            cudaFree(P2);
-            cudaFree(P3);
-            cudaFree(P1_prev);
-            cudaFree(P2_prev);
-            cudaFree(P3_prev);
-            cudaFree(R1);
-            cudaFree(R2);        
-            cudaFree(R3);        
-    } 
-    //cudaDeviceReset();
-    return 0;
-}
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h
deleted file mode 100755
index bf13508..0000000
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TV_FGP_GPU_
-#define _TV_FGP_GPU_
-
-#include "CCPiDefines.h"
-#include <memory.h>
-
-extern "C" CCPI_EXPORT int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
-
-#endif 
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
deleted file mode 100755
index 76f5be9..0000000
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ /dev/null
@@ -1,358 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "TV_ROF_GPU_core.h"
-
-/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
-*
-* Input Parameters:
-* 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
-* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
-*
-* Output:
-* [1] Regularized image/volume
-
- * This function is based on the paper by
-* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*
-* D. Kazantsev, 2016-18
-*/
-#include "shared.h"
-    
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-    
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-#define EPS 1.0e-12
-    
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-__host__ __device__ int sign (float x)
-{
-        return (x > 0) - (x < 0);
-}        
-   
-/*********************2D case****************************/    
-    
-    /* differences 1 */
-    __global__ void D1_func2D(float* Input, float* D1, int N, int M)      
-    {
-		int i1, j1, i2;
-		float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;        
-        
-        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
-            
-            /* boundary conditions (Neumann reflections) */
-                i1 = i + 1; if (i1 >= N) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= M) j1 = j-1;
-		
-		     /* Forward-backward differences */
-                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
-                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */                
-                NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
-                
-                denom1 = NOMx_1*NOMx_1;
-                denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1), abs((float)NOMy_0)));
-                denom2 = denom2*denom2;
-                T1 = sqrt(denom1 + denom2 + EPS);
-                D1[index] = NOMx_1/T1;
-		}		
-	}       
-    
-    /* differences 2 */
-    __global__ void D2_func2D(float* Input, float* D2, int N, int M)      
-    {
-		int i1, j1, j2;
-		float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;
-        
-        if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
-            
-            /* boundary conditions (Neumann reflections) */
-                i1 = i + 1; if (i1 >= N) i1 = i-1;
-                j1 = j + 1; if (j1 >= M) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1; 
-		
-                /* Forward-backward differences */
-                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
-                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
-                NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
-                
-                denom1 = NOMy_1*NOMy_1;
-                denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1), abs((float)NOMx_0)));
-                denom2 = denom2*denom2;
-                T2 = sqrt(denom1 + denom2 + EPS);
-                D2[index] = NOMy_1/T2;
-		}		
-	}
-    
-    __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M)    
-    {
-		int i2, j2;
-		float dv1,dv2;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        
-        int index = i + N*j;        
-        
-        if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
-            
-				/* boundary conditions (Neumann reflections) */
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1; 
-                
-				/* divergence components  */
-                dv1 = D1[index] - D1[j2*N + i];
-                dv2 = D2[index] - D2[j*N + i2];
-                
-                Update[index] += tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index]));      
-		
-		}  
-	}   
-/*********************3D case****************************/    
- 
-    /* differences 1 */
-    __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ)      
-    {
-		float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
-		int i1,i2,k1,j1,j2,k2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;     
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-            
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;                    
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
-                    NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */                    
-                    NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
-                    
-                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
-                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMx_1*NOMx_1;
-                    denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T1 = sqrt(denom1 + denom2 + denom3 + EPS);
-                    D1[index] = NOMx_1/T1;	
-		}		
-	}      
-
-    /* differences 2 */
-    __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ)      
-    {
-		float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
-		int i1,i2,k1,j1,j2,k2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;     
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-                    /* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    
-                    /* Forward-backward differences */
-                    NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
-                    NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
-                    NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
-                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
-                    
-                    
-                    denom1 = NOMy_1*NOMy_1;
-                    denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
-                    denom2 = denom2*denom2;
-                    denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
-                    denom3 = denom3*denom3;
-                    T2 = sqrt(denom1 + denom2 + denom3 + EPS);
-                    D2[index] = NOMy_1/T2;
-		}
-	}
-	
-	  /* differences 3 */
-    __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ)      
-    {
-		float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
-		int i1,i2,k1,j1,j2,k2;
-		
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-      	int index = (dimX*dimY)*k + j*dimX+i;     
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-
-				i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                i2 = i - 1; if (i2 < 0) i2 = i+1;
-                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                j2 = j - 1; if (j2 < 0) j2 = j+1;
-                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                k2 = k - 1; if (k2 < 0) k2 = k+1;
-                
-                /* Forward-backward differences */
-                NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
-                NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
-                NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
-                NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
-                NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
-               
-                denom1 = NOMz_1*NOMz_1;
-                denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
-                denom2 = denom2*denom2;
-                denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
-                denom3 = denom3*denom3;
-                T3 = sqrt(denom1 + denom2 + denom3 + EPS);
-                D3[index] = NOMz_1/T3;
-		}
-	}
-
-    __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ)    
-    {
-		float dv1, dv2, dv3;
-		int i1,i2,k1,j1,j2,k2;
-		int i = blockDim.x * blockIdx.x + threadIdx.x;
-        int j = blockDim.y * blockIdx.y + threadIdx.y;
-        int k = blockDim.z * blockIdx.z + threadIdx.z;
-        
-        int index = (dimX*dimY)*k + j*dimX+i;       
-        
-        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
-            
-					/* symmetric boundary conditions (Neuman) */
-                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
-                    i2 = i - 1; if (i2 < 0) i2 = i+1;
-                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
-                    j2 = j - 1; if (j2 < 0) j2 = j+1;
-                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
-                    k2 = k - 1; if (k2 < 0) k2 = k+1;
-                    
-                    /*divergence components */
-                    dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
-                    dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
-                    dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
-                    
-                    Update[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
-		
-		}  
-	}
-
-/////////////////////////////////////////////////
-// HOST FUNCTION
-extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
-{
-	    // set up device
-		int dev = 0;
-		CHECK(cudaSetDevice(dev));
-        float *d_input, *d_update, *d_D1, *d_D2;
-        
-	if (Z == 0) Z = 1;
-        CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float)));
-        CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float)));
-        
-        CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
-        CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));      
-        
-        if (Z > 1) {
-			// TV - 3D case
-            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-            dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));            
-            
-            float *d_D3;
-            CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float)));
-            
-            for(int n=0; n < iter; n++) {
-                /* calculate differences */
-                D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
-                CHECK(cudaDeviceSynchronize());
-				D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
-                CHECK(cudaDeviceSynchronize());        
-                D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
-                CHECK(cudaDeviceSynchronize());        
-                /*running main kernel*/
-                TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z);
-                CHECK(cudaDeviceSynchronize());
-            }
-            
-            CHECK(cudaFree(d_D3));
-        }
-        else {
-	    // TV - 2D case
-            dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-            dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
-             
-            for(int n=0; n < iter; n++) {
-                /* calculate differences */
-                D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
-                CHECK(cudaDeviceSynchronize());
-				D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
-                CHECK(cudaDeviceSynchronize());        
-                /*running main kernel*/
-                TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
-                CHECK(cudaDeviceSynchronize());
-            }
-        }        
-        CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
-        CHECK(cudaFree(d_input));
-        CHECK(cudaFree(d_update));
-        CHECK(cudaFree(d_D1));
-        CHECK(cudaFree(d_D2));        
-        //cudaDeviceReset();
-        return 0;
-}
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h
deleted file mode 100755
index 3a09296..0000000
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __TVGPU_H__
-#define __TVGPU_H__
-#include "CCPiDefines.h"
-#include <stdio.h>
-
-extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
-
-#endif 
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu
deleted file mode 100755
index 1f494ee..0000000
--- a/Core/regularisers_GPU/TV_SB_GPU_core.cu
+++ /dev/null
@@ -1,552 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-
-#include "TV_SB_GPU_core.h"
-#include "shared.h"
-#include <thrust/device_vector.h>
-#include <thrust/transform_reduce.h>
-
-/* CUDA implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularisation parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-* 6. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL parameter]
-* 7. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
-*
-* Output:
-* 1. Filtered/regularized image
-*
-* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
-*/
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-struct square { __host__ __device__ float operator()(float x) { return x * x; } };
-
-/************************************************/
-/*****************2D modules*********************/
-/************************************************/
-__global__ void gauss_seidel2D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, float lambda, float mu, float normConst, int N, int M, int ImSize)
-{
-    
-    float sum;
-    int i1,i2,j1,j2;
-     
-    //calculate each thread global index
-    const int i=blockIdx.x*blockDim.x+threadIdx.x;
-    const int j=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = j*N+i;
-    
-    if ((i < N) && (j < M)) {
-        i1 = i+1; if (i1 == N) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        j1 = j+1; if (j1 == M) j1 = j-1;
-        j2 = j-1; if (j2 < 0) j2 = j+1;
-        
-        sum = Dx[j*N+i2] - Dx[index] + Dy[j2*N+i] - Dy[index] - Bx[j*N+i2] + Bx[index] - By[j2*N+i] + By[index];
-        sum += U_prev[j*N+i1] + U_prev[j*N+i2] + U_prev[j1*N+i] + U_prev[j2*N+i];
-        sum *= lambda;
-        sum += mu*A[index];
-        U[index] = normConst*sum; //Write final result to global memory
-    }
-    return;
-}
-__global__ void updDxDy_shrinkAniso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
-{
-    
-    int i1,j1;
-    float val1, val11, val2, val22, denom_lam;
-    denom_lam = 1.0f/lambda;
-     
-    //calculate each thread global index
-    const int i=blockIdx.x*blockDim.x+threadIdx.x;
-    const int j=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = j*N+i;
-    
-    if ((i < N) && (j < M)) {
-        i1 = i+1; if (i1 == N) i1 = i-1;
-        j1 = j+1; if (j1 == M) j1 = j-1;
-                
-            val1 = (U[j*N+i1] - U[index]) + Bx[index];
-            val2 = (U[j1*N+i] - U[index]) + By[index];
-            
-            val11 = abs(val1) - denom_lam; if (val11 < 0) val11 = 0;
-            val22 = abs(val2) - denom_lam; if (val22 < 0) val22 = 0;
-            
-            if (val1 !=0) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0;
-            if (val2 !=0) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0;
-    }
-    return;
-}
-
-__global__ void updDxDy_shrinkIso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
-{
-    
-    int i1,j1;
-    float val1, val11, val2, denom_lam, denom;
-    denom_lam = 1.0f/lambda;
-     
-    //calculate each thread global index
-    const int i=blockIdx.x*blockDim.x+threadIdx.x;
-    const int j=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = j*N+i;
-    
-    if ((i < N) && (j < M)) {
-        i1 = i+1; if (i1 == N) i1 = i-1;
-        j1 = j+1; if (j1 == M) j1 = j-1;
-        
-            val1 = (U[j*N+i1] - U[index]) + Bx[index];
-            val2 = (U[j1*N+i] - U[index]) + By[index];
-            
-            denom = sqrt(val1*val1 + val2*val2);
-            
-            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
-            
-            if (denom != 0.0f) {
-                Dx[index] = val11*(val1/denom);
-                Dy[index] = val11*(val2/denom);
-            }
-            else {
-                Dx[index] = 0;
-                Dy[index] = 0;
-            }
-    }
-    return;
-}
-
-__global__ void updBxBy2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, int N, int M, int ImSize)
-{    
-    int i1,j1;
-     
-    //calculate each thread global index
-    const int i=blockIdx.x*blockDim.x+threadIdx.x;
-    const int j=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = j*N+i;
-    
-    if ((i < N) && (j < M)) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == N) i1 = i-1;
-            j1 = j+1; if (j1 == M) j1 = j-1;
-            
-            Bx[index] += (U[j*N+i1] - U[index]) - Dx[index];
-            By[index] += (U[j1*N+i] - U[index]) - Dy[index];
-    }
-    return;
-}
-
-
-/************************************************/
-/*****************3D modules*********************/
-/************************************************/
-__global__ void gauss_seidel3D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, float mu, float normConst, int N, int M, int Z, int ImSize)
-{
-    
-    float sum,d_val,b_val;
-    int i1,i2,j1,j2,k1,k2;
-     
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {
-        i1 = i+1; if (i1 == N) i1 = i-1;
-        i2 = i-1; if (i2 < 0) i2 = i+1;
-        j1 = j+1; if (j1 == M) j1 = j-1;
-        j2 = j-1; if (j2 < 0) j2 = j+1;
-        k1 = k+1; if (k1 == Z) k1 = k-1;
-        k2 = k-1; if (k2 < 0) k2 = k+1;
-        
-        d_val = Dx[(N*M)*k + j*N+i2] - Dx[index] + Dy[(N*M)*k + j2*N+i] - Dy[index] + Dz[(N*M)*k2 + j*N+i] - Dz[index];
-        b_val = -Bx[(N*M)*k + j*N+i2] + Bx[index] - By[(N*M)*k + j2*N+i] + By[index] - Bz[(N*M)*k2 + j*N+i] + Bz[index];
-        sum = d_val + b_val;
-        sum += U_prev[(N*M)*k + j*N+i1] + U_prev[(N*M)*k + j*N+i2] + U_prev[(N*M)*k + j1*N+i] + U_prev[(N*M)*k + j2*N+i] + U_prev[(N*M)*k1 + j*N+i] + U_prev[(N*M)*k2 + j*N+i];
-        sum *= lambda;
-        sum += mu*A[index];
-        U[index] = normConst*sum;
-    }
-    return;
-}
-__global__ void updDxDy_shrinkAniso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
-{
-    
-    int i1,j1,k1;
-    float val1, val11, val2, val3, val22, val33, denom_lam;
-    denom_lam = 1.0f/lambda;
-     
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {
-        i1 = i+1; if (i1 == N) i1 = i-1;
-        j1 = j+1; if (j1 == M) j1 = j-1;
-        k1 = k+1; if (k1 == Z) k1 = k-1;
-                
-            val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
-            val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
-            val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
-            
-            val11 = abs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
-            val22 = abs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
-            val33 = abs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
-            
-            if (val1 !=0.0f) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0.0f;
-            if (val2 !=0.0f) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0.0f;
-            if (val3 !=0.0f) Dz[index] = (val3/abs(val3))*val33; else Dz[index] = 0.0f;
-    }
-    return;
-}
-
-__global__ void updDxDy_shrinkIso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
-{
-    
-    int i1,j1,k1;
-    float val1, val11, val2, val3, denom_lam, denom;
-    denom_lam = 1.0f/lambda;
-     
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {
-        i1 = i+1; if (i1 == N) i1 = i-1;
-        j1 = j+1; if (j1 == M) j1 = j-1;
-        k1 = k+1; if (k1 == Z) k1 = k-1;
-        
-            val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
-            val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
-            val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
-            
-            denom = sqrt(val1*val1 + val2*val2 + val3*val3);
-            
-            val11 = (denom - denom_lam); if (val11 < 0.0f) val11 = 0.0f;
-            
-            if (denom != 0.0f) {
-                Dx[index] = val11*(val1/denom);
-                Dy[index] = val11*(val2/denom);
-                Dz[index] = val11*(val3/denom);
-            }
-            else {
-                Dx[index] = 0.0f;
-                Dy[index] = 0.0f;
-                Dz[index] = 0.0f;
-            }
-    }
-    return;
-}
-
-__global__ void updBxBy3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int N, int M, int Z, int ImSize)
-{    
-    int i1,j1,k1;
-     
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {
-            /* symmetric boundary conditions (Neuman) */
-            i1 = i+1; if (i1 == N) i1 = i-1;
-            j1 = j+1; if (j1 == M) j1 = j-1;
-            k1 = k+1; if (k1 == Z) k1 = k-1;
-            
-            Bx[index] += (U[(N*M)*k + i1 + N*j] - U[index]) - Dx[index];
-            By[index] += (U[(N*M)*k + i + N*j1] - U[index]) - Dy[index];
-            Bz[index] += (U[(N*M)*k1 + i + N*j] - U[index]) - Dz[index];
-    }
-    return;
-}
-
-__global__ void SBcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void SBcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void SBResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
-
-__global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
-
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-/********************* MAIN HOST FUNCTION ******************/
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
-{
-    int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return -1;
-    }
-    
-	int ll, DimTotal;
-	float re, lambda, normConst;
-    int count = 0;
-    mu = 1.0f/mu;
-	lambda = 2.0f*mu;
-
-    if (dimZ <= 1) {
-		/*2D verson*/
-		DimTotal = dimX*dimY;
-		normConst = 1.0f/(mu + 4.0f*lambda);
-		float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
-   
-		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
-    
-		/*allocate space for images on device*/
-		checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
-		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
-    
-        checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        cudaMemset(Dx, 0, DimTotal*sizeof(float));
-        cudaMemset(Dy, 0, DimTotal*sizeof(float));
-        cudaMemset(Bx, 0, DimTotal*sizeof(float));
-        cudaMemset(By, 0, DimTotal*sizeof(float));
-
-        /********************** Run CUDA 2D kernels here ********************/   
-        /* The main kernel */
-        for (ll = 0; ll < iter; ll++) {
-        
-        /* storing old value */
-        SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() );  
-
-		 /* perform two GS iterations (normally 2 is enough for the convergence) */
-        gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() ); 
-        SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() );  
-        /* 2nd GS iteration */
-        gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() ); 
-        
-        /* TV-related step */
-          if (methodTV == 1)  updDxDy_shrinkAniso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
-          else updDxDy_shrinkIso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
-            
-        /* update for Bregman variables */
-        updBxBy2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, dimX, dimY, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() ); 
-        
-          if (epsil != 0.0f) {
-                /* calculate norm - stopping rules using the Thrust library */
-                SBResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, DimTotal);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );               
-                
-                thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
-                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
-                thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);  		
-                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
-                    
-                re = (reduction/reduction2);      
-                if (re < epsil)  count++;
-                    if (count > 4) break;
-          }
-        
-        }
-        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);   
-            /***************************************************************/    
-            //copy result matrix from device to host memory
-            cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
-    
-            cudaFree(d_input);
-            cudaFree(d_update);
-            cudaFree(d_update_prev);
-            if (epsil != 0.0f) cudaFree(d_res);
-            cudaFree(Dx);
-            cudaFree(Dy);
-            cudaFree(Bx);
-            cudaFree(By);
-    }
-    else {
-		/*3D verson*/
-		DimTotal = dimX*dimY*dimZ;
-		normConst = 1.0f/(mu + 6.0f*lambda);
-		float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
-   
-        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-        dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
-    
-		/*allocate space for images on device*/
-		checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
-		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Dz,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&Bz,DimTotal*sizeof(float)) );
-    
-        checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
-        cudaMemset(Dx, 0, DimTotal*sizeof(float));
-        cudaMemset(Dy, 0, DimTotal*sizeof(float));
-        cudaMemset(Dz, 0, DimTotal*sizeof(float));
-        cudaMemset(Bx, 0, DimTotal*sizeof(float));
-        cudaMemset(By, 0, DimTotal*sizeof(float));
-        cudaMemset(Bz, 0, DimTotal*sizeof(float));
-
-        /********************** Run CUDA 3D kernels here ********************/   
-        /* The main kernel */
-        for (ll = 0; ll < iter; ll++) {
-        
-        /* storing old value */
-        SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() );
-
-		 /* perform two GS iterations (normally 2 is enough for the convergence) */
-        gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() ); 
-        SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() );  
-        /* 2nd GS iteration */
-        gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() ); 
-        
-        /* TV-related step */
-          if (methodTV == 1)  updDxDy_shrinkAniso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
-          else updDxDy_shrinkIso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
-            
-        /* update for Bregman variables */
-        updBxBy3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, DimTotal);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() ); 
-        
-          if (epsil != 0.0f) {
-                /* calculate norm - stopping rules using the Thrust library */
-                SBResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, dimZ, DimTotal);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );               
-                
-                thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
-                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
-                thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);  		
-                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
-                    
-                re = (reduction/reduction2);
-                if (re < epsil)  count++;
-                    if (count > 4) break;
-          }
-        }
-        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);   
-            /***************************************************************/    
-            //copy result matrix from device to host memory
-            cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
-    
-            cudaFree(d_input);
-            cudaFree(d_update);
-            cudaFree(d_update_prev);
-            if (epsil != 0.0f) cudaFree(d_res);
-            cudaFree(Dx);
-            cudaFree(Dy);
-            cudaFree(Dz);
-            cudaFree(Bx);
-            cudaFree(By);
-            cudaFree(Bz);
-    } 
-    //cudaDeviceReset();
-    return 0;
-}
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h
deleted file mode 100755
index 901b90f..0000000
--- a/Core/regularisers_GPU/TV_SB_GPU_core.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _SB_TV_GPU_
-#define _SB_TV_GPU_
-
-#include "CCPiDefines.h"
-#include <memory.h>
-
-
-extern "C" CCPI_EXPORT int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
-
-#endif 
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
deleted file mode 100644
index 7503ec7..0000000
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ /dev/null
@@ -1,741 +0,0 @@
- /*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/ 
-#include "shared.h"
-#include "dTV_FGP_GPU_core.h"
-#include <thrust/device_vector.h>
-#include <thrust/transform_reduce.h>
-
-/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
- * which employs structural similarity of the level sets of two images/volumes, see [1,2]
- * The current implementation updates image 1 while image 2 is being fixed.
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
- * 3. lambdaPar - regularization parameter [REQUIRED]
- * 4. Number of iterations [OPTIONAL]
- * 5. eplsilon: tolerance constant [OPTIONAL]
- * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
- * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
- * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
- * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
- *
- * Output:
- * [1] Filtered/regularized image/volume
- *
- * This function is based on the Matlab's codes and papers by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
- */
- 
-
-#define BLKXSIZE2D 16
-#define BLKYSIZE2D 16
-
-#define BLKXSIZE 8
-#define BLKYSIZE 8
-#define BLKZSIZE 8
-
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-struct square { __host__ __device__ float operator()(float x) { return x * x; } };
-
-/************************************************/
-/*****************2D modules*********************/
-/************************************************/
-
-__global__ void GradNorm_func2D_kernel(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize)
-{
-    
-    float val1, val2, gradX, gradY, magn;
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex; 
-    
-    if ((xIndex < N) && (yIndex < M)) {        
-        /* boundary conditions */
-        if (xIndex >= N-1) val1 = 0.0f; else val1 =  Refd[(xIndex+1) + N*yIndex];
-        if (yIndex >= M-1) val2 = 0.0f; else val2 =  Refd[(xIndex) + N*(yIndex + 1)];        
-        
-            gradX = val1 - Refd[index];
-            gradY = val2 - Refd[index];
-            magn = pow(gradX,2) + pow(gradY,2);
-            magn = sqrt(magn + pow(eta,2));
-            Refd_x[index] = gradX/magn;
-            Refd_y[index] = gradY/magn;         
-    }
-    return;
-}
-
-__global__ void ProjectVect_func2D_kernel(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize)
-{
-    
-    float in_prod;
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex; 
-    
-    if ((xIndex < N) && (yIndex < M)) {
-        in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index];   /* calculate inner product */
-        R1[index] = R1[index] - in_prod*Refd_x[index];
-        R2[index] = R2[index] - in_prod*Refd_y[index];       
-    }
-    return;
-}
-
-
-__global__ void Obj_dfunc2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
-{
-    
-    float val1,val2;
-    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex; 
-    
-    if ((xIndex < N) && (yIndex < M)) {        
-        if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
-        if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
-        
-        //Write final result to global memory
-        D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
-    }
-    return;
-}
-
-__global__ void Grad_dfunc2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2,  float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip)
-{
-    
-    float val1,val2,in_prod;
-    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) {        
-        
-        /* boundary conditions */
-        if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
-        if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
-        
-        in_prod = val1*Refd_x[index] + val2*Refd_y[index];   /* calculate inner product */
-        val1 = val1 - in_prod*Refd_x[index];
-        val2 = val2 - in_prod*Refd_y[index];   
-        
-        //Write final result to global memory
-        P1[index] = R1[index] + multip*val1;
-        P2[index] = R2[index] + multip*val2;
-    }
-    return;
-}
-
-__global__ void Proj_dfunc2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
-{
-    
-    float denom;    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) { 
-        denom = pow(P1[index],2) +  pow(P2[index],2);        
-        if (denom > 1.0f) {
-            P1[index] = P1[index]/sqrt(denom);
-            P2[index] = P2[index]/sqrt(denom);
-        }
-    }
-    return;
-}
-__global__ void Proj_dfunc2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
-{
-    
-    float val1, val2;    
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) { 
-                val1 = abs(P1[index]);
-                val2 = abs(P2[index]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                P1[index] = P1[index]/val1;
-                P2[index] = P2[index]/val2;
-    }
-    return;
-}
-__global__ void Rupd_dfunc2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
-{
-    //calculate each thread global index
-    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
-    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if ((xIndex < N) && (yIndex < M)) { 
-        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
-        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
-    }
-    return;
-}
-__global__ void dTVnonneg2D_kernel(float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        if (Output[index] < 0.0f) Output[index] = 0.0f;
-    }
-}
-__global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input[index];
-    }
-}
-
-__global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
-{
-    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
-    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
-    
-    int index = xIndex + N*yIndex;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
-
-__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
-{
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        Output[index] = Input1[index] - Input2[index];
-    }
-}
-
-/************************************************/
-/*****************3D modules*********************/
-/************************************************/
-__global__ void GradNorm_func3D_kernel(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize)
-{
-    
-    float val1, val2, val3, gradX, gradY, gradZ, magn;
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {  
-        /* boundary conditions */
-        if (i >= N-1) val1 = 0.0f; else val1 =  Refd[(N*M)*k + (i+1) + N*j];
-        if (j >= M-1) val2 = 0.0f; else val2 =  Refd[(N*M)*k + i + N*(j+1)];
-        if (k >= Z-1) val3 = 0.0f; else val3 =  Refd[(N*M)*(k+1) + i + N*j];
-        
-            gradX = val1 - Refd[index];
-            gradY = val2 - Refd[index];
-            gradZ = val3 - Refd[index];
-            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
-            magn = sqrt(magn + pow(eta,2));
-            Refd_x[index] = gradX/magn;
-            Refd_y[index] = gradY/magn;
-            Refd_z[index] = gradZ/magn;
-    }
-    return;
-}
-
-__global__ void ProjectVect_func3D_kernel(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize)
-{
-    
-    float in_prod;
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {
-        in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index] + R3[index]*Refd_z[index]; /* calculate inner product */
-        
-        R1[index] = R1[index] - in_prod*Refd_x[index];
-        R2[index] = R2[index] - in_prod*Refd_y[index];
-        R3[index] = R3[index] - in_prod*Refd_z[index];
-    }
-    return;
-}
-
-
-__global__ void Obj_dfunc3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
-{
-    
-    float val1,val2,val3;
-    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k < Z)) {
-        if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
-        if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
-        if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
-        //Write final result to global memory
-        D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
-    }
-    return;
-}
-
-__global__ void Grad_dfunc3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip)
-{
-    
-    float val1,val2,val3,in_prod;
-    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) {
-        /* boundary conditions */
-        if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
-        if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
-        if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];       
-        
-        in_prod = val1*Refd_x[index] + val2*Refd_y[index] + val3*Refd_z[index];   /* calculate inner product */
-        val1 = val1 - in_prod*Refd_x[index];
-        val2 = val2 - in_prod*Refd_y[index];
-        val3 = val3 - in_prod*Refd_z[index];
-        
-        //Write final result to global memory
-        P1[index] = R1[index] + multip*val1;
-        P2[index] = R2[index] + multip*val2;
-        P3[index] = R3[index] + multip*val3;
-    }
-    return;
-}
-
-__global__ void Proj_dfunc3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
-{
-    
-    float denom,sq_denom;    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) {
-        denom = pow(P1[index],2) +  pow(P2[index],2) + pow(P3[index],2);
-        
-        if (denom > 1.0f) {
-            sq_denom = 1.0f/sqrt(denom);
-            P1[index] = P1[index]*sq_denom;
-            P2[index] = P2[index]*sq_denom;
-            P3[index] = P3[index]*sq_denom;
-        }
-    }
-    return;
-}
-
-__global__ void Proj_dfunc3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
-{
-    
-    float val1, val2, val3;    
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) {
-                val1 = abs(P1[index]);
-                val2 = abs(P2[index]);
-                val3 = abs(P3[index]);
-                if (val1 < 1.0f) {val1 = 1.0f;}
-                if (val2 < 1.0f) {val2 = 1.0f;}
-                if (val3 < 1.0f) {val3 = 1.0f;}
-                P1[index] = P1[index]/val1;
-                P2[index] = P2[index]/val2;
-                P3[index] = P3[index]/val3;
-    }
-    return;
-}
-
-
-__global__ void Rupd_dfunc3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
-{
-    //calculate each thread global index
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if ((i < N) && (j < M) && (k <  Z)) { 
-        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
-        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
-        R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
-    }
-    return;
-}
-
-__global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    int k = blockDim.z * blockIdx.z + threadIdx.z;
-    
-    int index = (N*M)*k + i + N*j;
-    
-    if (index < num_total)	{
-        if (Output[index] < 0.0f) Output[index] = 0.0f;
-    }
-}
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-
-////////////MAIN HOST FUNCTION ///////////////
-extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
-{
-    int deviceCount = -1; // number of devices
-    cudaGetDeviceCount(&deviceCount);
-    if (deviceCount == 0) {
-        fprintf(stderr, "No CUDA devices found\n");
-        return -1;
-    }
-    
-    int count = 0, i;
-    float re, multip,multip2;    
-	float tk = 1.0f;
-    float tkp1=1.0f;
-        
-    if (dimZ <= 1) {
-		/*2D verson*/
-		int ImSize = dimX*dimY;    
-		float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *d_InputRef=NULL;
-   
-		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
-		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
-    
-		/*allocate space for images on device*/
-		checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
-		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
-		checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
-    
-        checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
-        checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
-        
-        cudaMemset(P1, 0, ImSize*sizeof(float));
-        cudaMemset(P2, 0, ImSize*sizeof(float));
-        cudaMemset(P1_prev, 0, ImSize*sizeof(float));
-        cudaMemset(P2_prev, 0, ImSize*sizeof(float));
-        cudaMemset(R1, 0, ImSize*sizeof(float));
-        cudaMemset(R2, 0, ImSize*sizeof(float));
-        cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
-        cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
-        
-        /******************** Run CUDA 2D kernel here ********************/
-        multip = (1.0f/(8.0f*lambdaPar));
-        /* calculate gradient vectors for the reference */
-        GradNorm_func2D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize);
-        checkCudaErrors( cudaDeviceSynchronize() );
-        checkCudaErrors(cudaPeekAtLastError() );
-    
-        /* The main kernel */
-        for (i = 0; i < iter; i++) {
-        
-            /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/         
-            ProjectVect_func2D_kernel<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-            
-            /* computing the gradient of the objective function */
-            Obj_dfunc2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-            
-            if (nonneg != 0) {
-            dTVnonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() ); }
-                    
-            /*Taking a step towards minus of the gradient*/
-            Grad_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            /* projection step */
-            if (methodTV == 0) Proj_dfunc2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
-            else Proj_dfunc2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            multip2 = ((tk-1.0f)/tkp1);
-        
-            Rupd_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            if (epsil != 0.0f) {
-                /* calculate norm - stopping rules using the Thrust library */
-                dTVResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );               
-                
-                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
-                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
-                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
-                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
-                    
-                re = (reduction/reduction2);      
-                if (re < epsil)  count++;
-                    if (count > 4) break;       
-             
-                dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );                                              
-            }
-        
-            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );       
- 
-            tk = tkp1;
-        }
-        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);   
-            /***************************************************************/    
-            //copy result matrix from device to host memory
-            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
-    
-            cudaFree(d_input);
-            cudaFree(d_update);
-            if (epsil != 0.0f) cudaFree(d_update_prev);
-            cudaFree(P1);
-            cudaFree(P2);
-            cudaFree(P1_prev);
-            cudaFree(P2_prev);
-            cudaFree(R1);
-            cudaFree(R2);
-            
-            cudaFree(d_InputRef);
-            cudaFree(InputRef_x);
-            cudaFree(InputRef_y);
-    }
-    else {
-            /*3D verson*/
-            int ImSize = dimX*dimY*dimZ;    
-            float *d_input, *d_update=NULL, *d_update_prev, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL, *d_InputRef=NULL;
-   
-            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
-            dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
-    
-            /*allocate space for images on device*/
-            checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
-            if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
-            checkCudaErrors( cudaMalloc((void**)&InputRef_z,ImSize*sizeof(float)) );    
-    
-            checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
-            checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
-            
-            cudaMemset(P1, 0, ImSize*sizeof(float));
-            cudaMemset(P2, 0, ImSize*sizeof(float));
-            cudaMemset(P3, 0, ImSize*sizeof(float));
-            cudaMemset(P1_prev, 0, ImSize*sizeof(float));
-            cudaMemset(P2_prev, 0, ImSize*sizeof(float));
-            cudaMemset(P3_prev, 0, ImSize*sizeof(float));
-            cudaMemset(R1, 0, ImSize*sizeof(float));
-            cudaMemset(R2, 0, ImSize*sizeof(float));
-            cudaMemset(R3, 0, ImSize*sizeof(float));
-            cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
-            cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
-            cudaMemset(InputRef_z, 0, ImSize*sizeof(float));
-            
-            /********************** Run CUDA 3D kernel here ********************/    
-            multip = (1.0f/(26.0f*lambdaPar));
-            /* calculate gradient vectors for the reference */
-            GradNorm_func3D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-    
-            /* The main kernel */
-        for (i = 0; i < iter; i++) {
-
-			/*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
-            ProjectVect_func3D_kernel<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            /* computing the gradient of the objective function */
-            Obj_dfunc3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            if (nonneg != 0) {
-            dTVnonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() ); }
-            
-            /*Taking a step towards minus of the gradient*/
-            Grad_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            /* projection step */
-            if (methodTV == 0) Proj_dfunc3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
-            else Proj_dfunc3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            multip2 = ((tk-1.0f)/tkp1);
-        
-            Rupd_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-            
-            if (epsil != 0.0f) {
-                /* calculate norm - stopping rules using the Thrust library */
-                dTVResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );               
-                
-                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
-                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
-                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
-                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
-                    
-                re = (reduction/reduction2);      
-                if (re < epsil)  count++;
-                    if (count > 4) break;       
-             
-                dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
-                checkCudaErrors( cudaDeviceSynchronize() );
-                checkCudaErrors(cudaPeekAtLastError() );
-            }
-        
-            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );
-        
-            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );   
-            
-            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
-            checkCudaErrors( cudaDeviceSynchronize() );
-            checkCudaErrors(cudaPeekAtLastError() );      
- 
-            tk = tkp1;
-        }
-        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);   
-            /***************************************************************/    
-            //copy result matrix from device to host memory
-            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
-    
-            cudaFree(d_input);
-            cudaFree(d_update);
-            if (epsil != 0.0f) cudaFree(d_update_prev);
-            cudaFree(P1);
-            cudaFree(P2);
-            cudaFree(P3);
-            cudaFree(P1_prev);
-            cudaFree(P2_prev);
-            cudaFree(P3_prev);
-            cudaFree(R1);
-            cudaFree(R2);
-            cudaFree(R3);
-            cudaFree(InputRef_x);
-            cudaFree(InputRef_y);
-            cudaFree(InputRef_z);
-            cudaFree(d_InputRef);
-    }
-    //cudaDeviceReset();
-    return 0;
-}
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
deleted file mode 100644
index f9281e8..0000000
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _dTV_FGP_GPU_
-#define _dTV_FGP_GPU_
-
-#include "CCPiDefines.h"
-#include <memory.h>
-
-extern "C" CCPI_EXPORT int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
-
-#endif 
diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h
deleted file mode 100644
index fe98cd6..0000000
--- a/Core/regularisers_GPU/shared.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*shared macros*/
-
-
-/*checks CUDA call, should be used in functions returning <int> value
-if error happens, writes to standard error and explicitly returns -1*/
-#define CHECK(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return -1;                                                             \
-    }                                                                          \
-}
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(call)                                                            \
-{                                                                              \
-    const cudaError_t error = call;                                            \
-    if (error != cudaSuccess)                                                  \
-    {                                                                          \
-        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
-        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
-                cudaGetErrorString(error));                                    \
-        return -1;                                                                \
-    }                                                                          \
-}
-/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
-                file, line, (int)err, cudaGetErrorString(err));
-        return;
-    }
-}
-*/
-
diff --git a/Wrappers/CMakeLists.txt b/Wrappers/CMakeLists.txt
deleted file mode 100644
index bdcb8f4..0000000
--- a/Wrappers/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-#   Copyright 2017 Edoardo Pasca
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-if (BUILD_MATLAB_WRAPPER)
-    add_subdirectory(Matlab)
-endif()
-if (BUILD_PYTHON_WRAPPER)
-    add_subdirectory(Python)
-endif()
\ No newline at end of file
diff --git a/Wrappers/Matlab/CMakeLists.txt b/Wrappers/Matlab/CMakeLists.txt
deleted file mode 100755
index 0c26148..0000000
--- a/Wrappers/Matlab/CMakeLists.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-project(regulariserMatlab)
-
-
-find_package(Matlab REQUIRED COMPONENTS MAIN_PROGRAM MX_LIBRARY ENG_LIBRARY )
-
-
-
-#C:\Users\ofn77899\Documents\Projects\CCPi\GitHub\CCPi-FISTA_Reconstruction\Core\regularisers_CPU
-# matlab_add_mex(
-    # NAME CPU_ROF
-    # SRC 
-      # ${CMAKE_SOURCE_DIR}/Wrappers/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
-    # LINK_TO cilreg ${Matlab_LIBRARIES}
-    # )
-    
-# target_include_directories(CPU_ROF 
-   # PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
-   # ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
-   # ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
-   # ${CMAKE_SOURCE_DIR}/Core/
-   # ${MATLAB_INCLUDE_DIR})
-   
-   # matlab_add_mex(
-    # NAME CPU_TNV
-    # SRC 
-      # ${CMAKE_SOURCE_DIR}/Wrappers/Matlab/mex_compile/regularisers_CPU/TNV.c 
-    # LINK_TO cilreg ${Matlab_LIBRARIES}
-    # )
-    
-# target_include_directories(CPU_TNV 
-   # PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
-   # ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
-   # ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
-   # ${CMAKE_SOURCE_DIR}/Core/
-   # ${MATLAB_INCLUDE_DIR})
-   
-#set (CPU_MEX_FILES "regularisers_CPU/TNV.c;regularisers_CPU/ROF_TV.c")
-#set (MEX_TARGETS "CPU_TNV;CPU_ROF")
-#list(APPEND MEX_TARGETS "CPU_TNV")
-#list(APPEND MEX_TARGETS "CPU_ROF")
-
-file(GLOB CPU_MEX_FILES
-    "${CMAKE_SOURCE_DIR}/Wrappers/Matlab/mex_compile/regularisers_CPU/*.c"
-    #"${CMAKE_SOURCE_DIR}/Wrappers/Matlab/mex_compile/regularisers_GPU/*.c"
-)
-
-#message("CPU_MEX_FILES " ${CPU_MEX_FILES})
-
-list(LENGTH CPU_MEX_FILES num)
-
-
-MATH(EXPR num "${num}-1")
-#set(num "-1")
-message("found ${num} files")
-
-foreach(tgt RANGE 0 ${num})
-  message("number " ${tgt})
-  list(LENGTH CPU_MEX_FILES num2)
-  message("the list is ${num2}")
-  #list(GET CPU_TARGETS ${tgt} current_target)
-  list(GET CPU_MEX_FILES ${tgt} current_file_name)
-  get_filename_component(current_file ${current_file_name} NAME)
-  string(REGEX MATCH "(.+).c" match ${current_file})
-  if (NOT ${match} EQUAL "" )
-  set (current_target ${CMAKE_MATCH_1})
-  endif()
-  message("matlab_add_mex target " ${current_file} " and " ${current_target})
-  matlab_add_mex(
-    NAME ${current_target}
-    SRC 
-      ${current_file_name} 
-            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/FGP_TV_core.c
-	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/SB_TV_core.c
-	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/TGV_core.c
-	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/Diffusion_core.c
-	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/Diffus4th_order_core.c
-	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/LLT_ROF_core.c
-            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/ROF_TV_core.c
-            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/FGP_dTV_core.c
-            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/TNV_core.c
-            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/utils.c
-	    #${CMAKE_SOURCE_DIR}/Core/inpainters_CPU/Diffusion_Inpaint_core.c
-	    #${CMAKE_SOURCE_DIR}/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
-    LINK_TO cilreg ${Matlab_LIBRARIES}
-    )
-    
-target_include_directories(${current_target}
-   PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
-   ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
-   ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
-   ${CMAKE_SOURCE_DIR}/Core/
-   ${MATLAB_INCLUDE_DIR})
-   set_property(TARGET ${current_target} PROPERTY C_STANDARD 99)
-   list(APPEND CPU_MEX_TARGETS ${current_target})
-   INSTALL(TARGETS ${current_target} DESTINATION "${MATLAB_DEST}")
-endforeach()
-   
-add_custom_target(MatlabWrapper DEPENDS ${CPU_MEX_TARGETS})
-
-if (BUILD_CUDA)
-    find_package(CUDA)
-    if (CUDA_FOUND)
-      file(GLOB GPU_MEX_FILES
-        "${CMAKE_SOURCE_DIR}/Wrappers/Matlab/mex_compile/regularisers_GPU/*.cpp"
-      )
-
-      list(LENGTH GPU_MEX_FILES num)
-message("number of GPU files  " ${num})
-
-      MATH(EXPR num "${num}-1")
-    #set(num "-1")
-
-      foreach(tgt RANGE ${num})
-        message("number " ${tgt})
-  list(LENGTH GPU_MEX_FILES num2)
-  message("the list is ${num2}")
-  #list(GET CPU_TARGETS ${tgt} current_target)
-  list(GET GPU_MEX_FILES ${tgt} current_file_name)
-  get_filename_component(current_file ${current_file_name} NAME)
-  string(REGEX MATCH "(.+).c" match ${current_file})
-  if (NOT ${match} EQUAL "" )
-  set (current_target ${CMAKE_MATCH_1})
-  endif()
-  message("matlab_add_mex target " ${current_file} " and " ${current_target})
-        message("matlab_add_mex " ${current_target})
-        matlab_add_mex(
-          NAME ${current_target}
-          SRC 
-            ${current_file_name} 
-          LINK_TO cilregcuda ${Matlab_LIBRARIES}
-          )
-        
-        target_include_directories(${current_target}
-        PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
-               ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
-               ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
-               ${CMAKE_SOURCE_DIR}/Core/
-               ${MATLAB_INCLUDE_DIR})
-       
-        list(APPEND GPU_MEX_TARGETS ${current_target})
-        INSTALL(TARGETS ${current_target} DESTINATION "${MATLAB_DEST}")
-      endforeach()
-       
-      add_custom_target(MatlabWrapperGPU DEPENDS ${GPU_MEX_TARGETS})
-      
-    endif()
-endif()
diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
deleted file mode 100644
index 0c331a4..0000000
--- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
+++ /dev/null
@@ -1,178 +0,0 @@
-% Volume (3D) denoising demo using CCPi-RGL
-clear; close all
-Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
-Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
-Path3 = sprintf(['..' filesep 'supp'], 1i);
-addpath(Path1);
-addpath(Path2);
-addpath(Path3);
-
-N = 512; 
-slices = 7;
-vol3D = zeros(N,N,slices, 'single');
-Ideal3D = zeros(N,N,slices, 'single');
-Im = double(imread('lena_gray_512.tif'))/255;  % loading image
-for i = 1:slices
-vol3D(:,:,i) = Im + .05*randn(size(Im)); 
-Ideal3D(:,:,i) = Im;
-end
-vol3D(vol3D < 0) = 0;
-figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
-
-
-lambda_reg = 0.03; % regularsation parameter for all methods
-%%
-fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
-tau_rof = 0.0025; % time-marching constant 
-iter_rof = 300; % number of ROF iterations
-tic; u_rof = ROF_TV(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; 
-energyfunc_val_rof = TV_energy(single(u_rof),single(vol3D),lambda_reg, 1);  % get energy function value
-rmse_rof = (RMSE(Ideal3D(:),u_rof(:)));
-fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rof);
-figure; imshow(u_rof(:,:,7), [0 1]); title('ROF-TV denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the ROF-TV model (GPU) \n');
-% tau_rof = 0.0025; % time-marching constant 
-% iter_rof = 300; % number of ROF iterations
-% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_reg, iter_rof, tau_rof); toc;
-% rmse_rofG = (RMSE(Ideal3D(:),u_rofG(:)));
-% fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rofG);
-% figure; imshow(u_rofG(:,:,7), [0 1]); title('ROF-TV denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using the FGP-TV model (CPU) \n');
-iter_fgp = 300; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
-tic; u_fgp = FGP_TV(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
-energyfunc_val_fgp = TV_energy(single(u_fgp),single(vol3D),lambda_reg, 1); % get energy function value
-rmse_fgp = (RMSE(Ideal3D(:),u_fgp(:)));
-fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgp);
-figure; imshow(u_fgp(:,:,7), [0 1]); title('FGP-TV denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the FGP-TV model (GPU) \n');
-% iter_fgp = 300; % number of FGP iterations
-% epsil_tol =  1.0e-05; % tolerance
-% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
-% rmse_fgpG = (RMSE(Ideal3D(:),u_fgpG(:)));
-% fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgpG);
-% figure; imshow(u_fgpG(:,:,7), [0 1]); title('FGP-TV denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using the SB-TV model (CPU) \n');
-iter_sb = 150; % number of SB iterations
-epsil_tol =  1.0e-05; % tolerance
-tic; u_sb = SB_TV(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
-energyfunc_val_sb = TV_energy(single(u_sb),single(vol3D),lambda_reg, 1);  % get energy function value
-rmse_sb = (RMSE(Ideal3D(:),u_sb(:)));
-fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sb);
-figure; imshow(u_sb(:,:,7), [0 1]); title('SB-TV denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the SB-TV model (GPU) \n');
-% iter_sb = 150; % number of SB iterations
-% epsil_tol =  1.0e-05; % tolerance
-% tic; u_sbG = SB_TV_GPU(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
-% rmse_sbG = (RMSE(Ideal3D(:),u_sbG(:)));
-% fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sbG);
-% figure; imshow(u_sbG(:,:,7), [0 1]); title('SB-TV denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using the ROF-LLT model (CPU) \n');
-lambda_ROF = lambda_reg; % ROF regularisation parameter
-lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
-iter_LLT = 300; % iterations 
-tau_rof_llt = 0.0025; % time-marching constant 
-tic; u_rof_llt = LLT_ROF(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt(:)));
-fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
-figure; imshow(u_rof_llt(:,:,7), [0 1]); title('ROF-LLT denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the ROF-LLT model (GPU) \n');
-% lambda_ROF = lambda_reg; % ROF regularisation parameter
-% lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
-% iter_LLT = 300; % iterations 
-% tau_rof_llt = 0.0025; % time-marching constant 
-% tic; u_rof_llt_g = LLT_ROF_GPU(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-% rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt_g(:)));
-% fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
-% figure; imshow(u_rof_llt_g(:,:,7), [0 1]); title('ROF-LLT denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using Nonlinear-Diffusion model (CPU) \n');
-iter_diff = 300; % number of diffusion iterations
-lambda_regDiff = 0.025; % regularisation for the diffusivity 
-sigmaPar = 0.015; % edge-preserving parameter
-tau_param = 0.025; % time-marching constant 
-tic; u_diff = NonlDiff(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-rmse_diff = (RMSE(Ideal3D(:),u_diff(:)));
-fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
-figure; imshow(u_diff(:,:,7), [0 1]); title('Diffusion denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using Nonlinear-Diffusion model (GPU) \n');
-% iter_diff = 300; % number of diffusion iterations
-% lambda_regDiff = 0.025; % regularisation for the diffusivity 
-% sigmaPar = 0.015; % edge-preserving parameter
-% tau_param = 0.025; % time-marching constant 
-% tic; u_diff_g = NonlDiff_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-% rmse_diff = (RMSE(Ideal3D(:),u_diff_g(:)));
-% fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
-% figure; imshow(u_diff_g(:,:,7), [0 1]); title('Diffusion denoised volume (GPU)');
-%%
-fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
-iter_diff = 300; % number of diffusion iterations
-lambda_regDiff = 3.5; % regularisation for the diffusivity 
-sigmaPar = 0.02; % edge-preserving parameter
-tau_param = 0.0015; % time-marching constant 
-tic; u_diff4 = Diffusion_4thO(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-rmse_diff4 = (RMSE(Ideal3D(:),u_diff4(:)));
-fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
-figure; imshow(u_diff4(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (CPU)');
-%%
-% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
-% iter_diff = 300; % number of diffusion iterations
-% lambda_regDiff = 3.5; % regularisation for the diffusivity 
-% sigmaPar = 0.02; % edge-preserving parameter
-% tau_param = 0.0015; % time-marching constant 
-% tic; u_diff4_g = Diffusion_4thO_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-% rmse_diff4 = (RMSE(Ideal3D(:),u_diff4_g(:)));
-% fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
-% figure; imshow(u_diff4_g(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (GPU)');
-%%
-fprintf('Denoise using the TGV model (CPU) \n');
-lambda_TGV = 0.03; % regularisation parameter
-alpha1 = 1.0; % parameter to control the first-order term
-alpha0 = 2.0; % parameter to control the second-order term
-iter_TGV = 500; % number of Primal-Dual iterations for TGV
-tic; u_tgv = TGV(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
-rmseTGV = RMSE(Ideal3D(:),u_tgv(:));
-fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
-figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)');
-%%
-%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
-fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
-
-% create another volume (reference) with slightly less amount of noise
-vol3D_ref = zeros(N,N,slices, 'single');
-for i = 1:slices
-vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
-end
-vol3D_ref(vol3D_ref < 0) = 0;
-% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-
-iter_fgp = 300; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
-eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-figure; imshow(u_fgp_dtv(:,:,7), [0 1]); title('FGP-dTV denoised volume (CPU)');
-%%
-fprintf('Denoise a volume using the FGP-dTV model (GPU) \n');
-
-% create another volume (reference) with slightly less amount of noise
-vol3D_ref = zeros(N,N,slices, 'single');
-for i = 1:slices
-vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
-end
-vol3D_ref(vol3D_ref < 0) = 0;
-% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-
-iter_fgp = 300; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
-eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-figure; imshow(u_fgp_dtv_g(:,:,7), [0 1]); title('FGP-dTV denoised volume (GPU)');
-%%
diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
deleted file mode 100644
index 14d3096..0000000
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ /dev/null
@@ -1,189 +0,0 @@
-% Image (2D) denoising demo using CCPi-RGL
-clear; close all
-fsep = '/';
-
-Path1 = sprintf(['..' fsep 'mex_compile' fsep 'installed'], 1i);
-Path2 = sprintf(['..' fsep '..' fsep '..' fsep 'data' fsep], 1i);
-Path3 = sprintf(['..' fsep 'supp'], 1i);
-addpath(Path1); addpath(Path2); addpath(Path3);
-
-Im = double(imread('lena_gray_512.tif'))/255;  % loading image
-u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-figure; imshow(u0, [0 1]); title('Noisy image');
-
-lambda_reg = 0.03; % regularsation parameter for all methods
-%%
-fprintf('Denoise using the ROF-TV model (CPU) \n');
-tau_rof = 0.0025; % time-marching constant 
-iter_rof = 750; % number of ROF iterations
-tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; 
-energyfunc_val_rof = TV_energy(single(u_rof),single(u0),lambda_reg, 1);  % get energy function value
-rmseROF = (RMSE(u_rof(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for ROF-TV is:', rmseROF);
-figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
-%%
-% fprintf('Denoise using the ROF-TV model (GPU) \n');
-% tau_rof = 0.0025; % time-marching constant 
-% iter_rof = 750; % number of ROF iterations
-% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc;
-% figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
-%%
-fprintf('Denoise using the FGP-TV model (CPU) \n');
-iter_fgp = 1000; % number of FGP iterations
-epsil_tol =  1.0e-06; % tolerance
-tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
-energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value
-rmseFGP = (RMSE(u_fgp(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmseFGP);
-figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
-
-%%
-% fprintf('Denoise using the FGP-TV model (GPU) \n');
-% iter_fgp = 1000; % number of FGP iterations
-% epsil_tol =  1.0e-05; % tolerance
-% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
-% figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
-%%
-fprintf('Denoise using the SB-TV model (CPU) \n');
-iter_sb = 150; % number of SB iterations
-epsil_tol =  1.0e-06; % tolerance
-tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
-energyfunc_val_sb = TV_energy(single(u_sb),single(u0),lambda_reg, 1);  % get energy function value
-rmseSB = (RMSE(u_sb(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmseSB);
-figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
-%%
-% fprintf('Denoise using the SB-TV model (GPU) \n');
-% iter_sb = 150; % number of SB iterations
-% epsil_tol =  1.0e-06; % tolerance
-% tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
-% figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');
-%%
-fprintf('Denoise using the TGV model (CPU) \n');
-lambda_TGV = 0.045; % regularisation parameter
-alpha1 = 1.0; % parameter to control the first-order term
-alpha0 = 2.0; % parameter to control the second-order term
-iter_TGV = 2000; % number of Primal-Dual iterations for TGV
-tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
-rmseTGV = (RMSE(u_tgv(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
-figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)');
-%%
-% fprintf('Denoise using the TGV model (GPU) \n');
-% lambda_TGV = 0.045; % regularisation parameter
-% alpha1 = 1.0; % parameter to control the first-order term
-% alpha0 = 2.0; % parameter to control the second-order term
-% iter_TGV = 2000; % number of Primal-Dual iterations for TGV
-% tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
-% rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));
-% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu);
-% figure; imshow(u_tgv_gpu, [0 1]); title('TGV denoised image (GPU)');
-%%
-fprintf('Denoise using the ROF-LLT model (CPU) \n');
-lambda_ROF = lambda_reg; % ROF regularisation parameter
-lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
-iter_LLT = 1; % iterations 
-tau_rof_llt = 0.0025; % time-marching constant 
-tic; u_rof_llt = LLT_ROF(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-rmseROFLLT = (RMSE(u_rof_llt(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT);
-figure; imshow(u_rof_llt, [0 1]); title('ROF-LLT denoised image (CPU)');
-%%
-% fprintf('Denoise using the ROF-LLT model (GPU) \n');
-% lambda_ROF = lambda_reg; % ROF regularisation parameter
-% lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
-% iter_LLT = 500; % iterations 
-% tau_rof_llt = 0.0025; % time-marching constant 
-% tic; u_rof_llt_g = LLT_ROF_GPU(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-% rmseROFLLT_g = (RMSE(u_rof_llt_g(:),Im(:)));
-% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT_g);
-% figure; imshow(u_rof_llt_g, [0 1]); title('ROF-LLT denoised image (GPU)');
-%%
-fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n');
-iter_diff = 800; % number of diffusion iterations
-lambda_regDiff = 0.025; % regularisation for the diffusivity 
-sigmaPar = 0.015; % edge-preserving parameter
-tau_param = 0.025; % time-marching constant 
-tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-rmseDiffus = (RMSE(u_diff(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus);
-figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)');
-%%
-% fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n');
-% iter_diff = 800; % number of diffusion iterations
-% lambda_regDiff = 0.025; % regularisation for the diffusivity 
-% sigmaPar = 0.015; % edge-preserving parameter
-% tau_param = 0.025; % time-marching constant 
-% tic; u_diff_g = NonlDiff_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-% figure; imshow(u_diff_g, [0 1]); title('Diffusion denoised image (GPU)');
-%%
-fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
-iter_diff = 800; % number of diffusion iterations
-lambda_regDiff = 3.5; % regularisation for the diffusivity 
-sigmaPar = 0.02; % edge-preserving parameter
-tau_param = 0.0015; % time-marching constant 
-tic; u_diff4 = Diffusion_4thO(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-rmseDiffHO = (RMSE(u_diff4(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Fourth-order anisotropic diffusion is:', rmseDiffHO);
-figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
-%%
-% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
-% iter_diff = 800; % number of diffusion iterations
-% lambda_regDiff = 3.5; % regularisation for the diffusivity 
-% sigmaPar = 0.02; % edge-preserving parameter
-% tau_param = 0.0015; % time-marching constant 
-% tic; u_diff4_g = Diffusion_4thO_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-% figure; imshow(u_diff4_g, [0 1]); title('Diffusion 4thO denoised image (GPU)');
-%%
-fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n');
-SearchingWindow = 7;
-PatchWindow = 2;
-NeighboursNumber = 20; % the number of neibours to include
-h = 0.23; % edge related parameter for NLM
-tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); toc;
-%%
-fprintf('Denoise using Non-local Total Variation (CPU) \n');
-iter_nltv = 3; % number of nltv iterations
-lambda_nltv = 0.05; % regularisation parameter for nltv
-tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc; 
-rmse_nltv = (RMSE(u_nltv(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv);
-figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)');
-%%
-%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
-
-fprintf('Denoise using the FGP-dTV model (CPU) \n');
-% create another image (reference) with slightly less amount of noise
-u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
-% u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-
-iter_fgp = 1000; % number of FGP iterations
-epsil_tol =  1.0e-06; % tolerance
-eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-rmse_dTV= (RMSE(u_fgp_dtv(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Directional Total Variation (dTV) is:', rmse_dTV);
-figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
-%%
-% fprintf('Denoise using the FGP-dTV model (GPU) \n');
-% % create another image (reference) with slightly less amount of noise
-% u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
-% % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-% 
-% iter_fgp = 1000; % number of FGP iterations
-% epsil_tol =  1.0e-06; % tolerance
-% eta =  0.2; % Reference image gradient smoothing constant
-% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-% figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)');
-%%
-fprintf('Denoise using the TNV prior (CPU) \n');
-slices = 5; N = 512;
-vol3D = zeros(N,N,slices, 'single');
-for i = 1:slices
-vol3D(:,:,i) = Im + .05*randn(size(Im)); 
-end
-vol3D(vol3D < 0) = 0;
-
-iter_tnv = 200; % number of TNV iterations
-tic; u_tnv = TNV(single(vol3D), lambda_reg, iter_tnv); toc; 
-figure; imshow(u_tnv(:,:,3), [0 1]); title('TNV denoised stack of channels (CPU)');
diff --git a/Wrappers/Matlab/demos/demoMatlab_inpaint.m b/Wrappers/Matlab/demos/demoMatlab_inpaint.m
deleted file mode 100644
index 66f9c15..0000000
--- a/Wrappers/Matlab/demos/demoMatlab_inpaint.m
+++ /dev/null
@@ -1,35 +0,0 @@
-% Image (2D) inpainting demo using CCPi-RGL
-clear; close all
-Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
-Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
-addpath(Path1);
-addpath(Path2);
-
-load('SinoInpaint.mat');
-Sinogram = Sinogram./max(Sinogram(:));
-Sino_mask = Sinogram.*(1-single(Mask));
-figure; 
-subplot(1,2,1); imshow(Sino_mask, [0 1]); title('Missing data sinogram');
-subplot(1,2,2); imshow(Mask, [0 1]); title('Mask');
-%%
-fprintf('Inpaint using Linear-Diffusion model (CPU) \n');
-iter_diff = 5000; % number of diffusion iterations
-lambda_regDiff = 6000; % regularisation for the diffusivity 
-sigmaPar = 0.0; % edge-preserving parameter
-tau_param = 0.000075; % time-marching constant 
-tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-figure; imshow(u_diff, [0 1]); title('Linear-Diffusion inpainted sinogram (CPU)');
-%%
-fprintf('Inpaint using Nonlinear-Diffusion model (CPU) \n');
-iter_diff = 1500; % number of diffusion iterations
-lambda_regDiff = 80; % regularisation for the diffusivity 
-sigmaPar = 0.00009; % edge-preserving parameter
-tau_param = 0.000008; % time-marching constant 
-tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-figure; imshow(u_diff, [0 1]); title('Non-Linear Diffusion inpainted sinogram (CPU)');
-%%
-fprintf('Inpaint using Nonlocal Vertical Marching model (CPU) \n');
-Increment = 1; % linear increment for the searching window
-tic; [u_nom,maskupd] = NonlocalMarching_Inpaint(single(Sino_mask), Mask, Increment); toc;
-figure; imshow(u_nom, [0 1]); title('NVM inpainted sinogram (CPU)');
-%%
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m b/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
deleted file mode 100644
index 72a828e..0000000
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex_Linux.m
+++ /dev/null
@@ -1,81 +0,0 @@
-% execute this mex file on Linux in Matlab once
-
-fsep = '/';
-
-pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_CPU'], 1i);
-pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
-pathcopyFrom2 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'inpainters_CPU'], 1i);
-
-copyfile(pathcopyFrom, 'regularisers_CPU');
-copyfile(pathcopyFrom1, 'regularisers_CPU');
-copyfile(pathcopyFrom2, 'regularisers_CPU');
-
-cd regularisers_CPU
-
-Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
-
-fprintf('%s \n', '<<<<<<<<<<<Compiling CPU regularisers>>>>>>>>>>>>>');
-
-fprintf('%s \n', 'Compiling ROF-TV...');
-mex ROF_TV.c ROF_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('ROF_TV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling FGP-TV...');
-mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('FGP_TV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling SB-TV...');
-mex SB_TV.c SB_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('SB_TV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling dFGP-TV...');
-mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('FGP_dTV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling TNV...');
-mex TNV.c TNV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('TNV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling NonLinear Diffusion...');
-mex NonlDiff.c Diffusion_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('NonlDiff.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
-mex Diffusion_4thO.c Diffus4th_order_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('Diffusion_4thO.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling TGV...');
-mex TGV.c TGV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('TGV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling ROF-LLT...');
-mex LLT_ROF.c LLT_ROF_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('LLT_ROF.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling NonLocal-TV...');
-mex PatchSelect.c PatchSelect_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('Nonlocal_TV.mex*',Pathmove);
-movefile('PatchSelect.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling additional tools...');
-mex TV_energy.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('TV_energy.mex*',Pathmove);
-
-%############Inpainters##############%
-fprintf('%s \n', 'Compiling Nonlinear/Linear diffusion inpainting...');
-mex NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('NonlDiff_Inp.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling Nonlocal marching method for inpainting...');
-mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
-
-delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* LLT_ROF_core* CCPiDefines.h
-delete PatchSelect_core* Nonlocal_TV_core*
-delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
-fprintf('%s \n', '<<<<<<< Regularisers successfully compiled! >>>>>>>');
-
-pathA2 = sprintf(['..' fsep '..' fsep], 1i);
-cd(pathA2);
-cd demos
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m b/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
deleted file mode 100644
index 6f7541c..0000000
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
+++ /dev/null
@@ -1,135 +0,0 @@
-% execute this mex file on Windows in Matlab once
-
-% >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-% I've been able to compile on Windows 7 with MinGW and Matlab 2016b, however, 
-% not sure if openmp is enabled after the compilation. 
-
-% Here I present two ways how software can be compiled, if you have some
-% other suggestions/remarks please contact me at dkazanc@hotmail.com 
-% >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-
-fsep = '/';
-
-pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_CPU'], 1i);
-pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
-pathcopyFrom2 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'inpainters_CPU'], 1i);
-
-copyfile(pathcopyFrom, 'regularisers_CPU');
-copyfile(pathcopyFrom1, 'regularisers_CPU');
-copyfile(pathcopyFrom2, 'regularisers_CPU');
-
-cd regularisers_CPU
-
-Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
-
-fprintf('%s \n', '<<<<<<<<<<<Compiling CPU regularisers>>>>>>>>>>>>>');
-
-fprintf('%s \n', 'Compiling ROF-TV...');
-mex ROF_TV.c ROF_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('ROF_TV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling FGP-TV...');
-mex FGP_TV.c FGP_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('FGP_TV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling SB-TV...');
-mex SB_TV.c SB_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('SB_TV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling dFGP-TV...');
-mex FGP_dTV.c FGP_dTV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('FGP_dTV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling TNV...');
-mex TNV.c TNV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('TNV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling NonLinear Diffusion...');
-mex NonlDiff.c Diffusion_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('NonlDiff.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
-mex Diffusion_4thO.c Diffus4th_order_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('Diffusion_4thO.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling TGV...');
-mex TGV.c TGV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('TGV.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling ROF-LLT...');
-mex LLT_ROF.c LLT_ROF_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('LLT_ROF.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling NonLocal-TV...');
-mex PatchSelect.c PatchSelect_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('Nonlocal_TV.mex*',Pathmove);
-movefile('PatchSelect.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling additional tools...');
-mex TV_energy.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('TV_energy.mex*',Pathmove);
-
-%############Inpainters##############%
-fprintf('%s \n', 'Compiling Nonlinear/Linear diffusion inpainting...');
-mex NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('NonlDiff_Inp.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling Nonlocal marching method for inpaiting...');
-mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
-movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
-
-
-%%
-%%% The second approach to compile using TDM-GCC which follows this
-%%% discussion:
-%%% https://uk.mathworks.com/matlabcentral/answers/279171-using-mingw-compiler-and-open-mp#comment_359122
-%%% 1. Install TDM-GCC independently from http://tdm-gcc.tdragon.net/ (I installed 5.1.0)
-%%% Install openmp version: http://sourceforge.net/projects/tdm-gcc/files/TDM-GCC%205%20series/5.1.0-tdm64-1/gcc-5.1.0-tdm64-1-openmp.zip/download
-%%% 2. Link til libgomp.a in that installation when compilling your mex file.
-
-%%% assuming you unzipped TDM GCC (OpenMp) in folder TDMGCC on C drive, uncomment
-%%% bellow
-% fprintf('%s \n', 'Compiling CPU regularisers...');
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" ROF_TV.c ROF_TV_core.c utils.c
-% movefile('ROF_TV.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" FGP_TV.c FGP_TV_core.c utils.c
-% movefile('FGP_TV.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" SB_TV.c SB_TV_core.c utils.c
-% movefile('SB_TV.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" FGP_dTV.c FGP_dTV_core.c utils.c
-% movefile('FGP_dTV.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TNV.c TNV_core.c utils.c
-% movefile('TNV.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff.c Diffusion_core.c utils.c
-% movefile('NonlDiff.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Diffusion_4thO.c Diffus4th_order_core.c utils.c
-% movefile('Diffusion_4thO.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TGV.c TGV_core.c utils.c
-% movefile('TGV.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" LLT_ROF.c LLT_ROF_core.c utils.c
-% movefile('LLT_ROF.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" PatchSelect.c PatchSelect_core.c utils.c
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Nonlocal_TV.c Nonlocal_TV_core.c utils.c
-% movefile('Nonlocal_TV.mex*',Pathmove);
-% movefile('PatchSelect.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TV_energy.c utils.c
-% movefile('TV_energy.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c
-% movefile('NonlDiff_Inp.mex*',Pathmove);
-% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c
-% movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
-
-
-delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* CCPiDefines.h
-delete PatchSelect_core* Nonlocal_TV_core*
-delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
-fprintf('%s \n', 'Regularisers successfully compiled!');
-
-
-%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%pathA2 = sprintf(['..' fsep '..' fsep], 1i);
-%cd(pathA2);
-%cd demos
diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
deleted file mode 100644
index dd1475c..0000000
--- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m
+++ /dev/null
@@ -1,74 +0,0 @@
-% execute this mex file in Matlab once
-
-%>>>>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<<
-% In order to compile CUDA modules one needs to have nvcc-compiler
-% installed (see CUDA SDK), check it under MATLAB with !nvcc --version
-
-% In the code bellow we provide a full explicit path to nvcc compiler 
-% ! paths to matlab and CUDA sdk can be different, modify accordingly !
-
-% Tested on Ubuntu 18.04/MATLAB 2016b/cuda10.0/gcc7.3
-
-% Installation HAS NOT been tested on Windows, please you Cmake build or
-% modify the code bellow accordingly
-fsep = '/';
-
-pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_GPU'], 1i);
-pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
-
-copyfile(pathcopyFrom, 'regularisers_GPU');
-copyfile(pathcopyFrom1, 'regularisers_GPU');
-
-cd regularisers_GPU
-
-Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
-
-fprintf('%s \n', '<<<<<<<<<<<Compiling GPU regularisers (CUDA)>>>>>>>>>>>>>');
-
-fprintf('%s \n', 'Compiling ROF-TV...');
-!/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o
-movefile('ROF_TV_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling FGP-TV...');
-!/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
-movefile('FGP_TV_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling SB-TV...');
-!/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
-movefile('SB_TV_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling TGV...');
-!/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o
-movefile('TGV_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling dFGP-TV...');
-!/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
-movefile('FGP_dTV_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling NonLinear Diffusion...');
-!/usr/local/cuda/bin/nvcc -O0 -c NonlDiff_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o
-movefile('NonlDiff_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
-!/usr/local/cuda/bin/nvcc -O0 -c Diffus_4thO_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o
-movefile('Diffusion_4thO_GPU.mex*',Pathmove);
-
-fprintf('%s \n', 'Compiling ROF-LLT...');
-!/usr/local/cuda/bin/nvcc -O0 -c LLT_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o
-movefile('LLT_ROF_GPU.mex*',Pathmove);
-
-
-delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* LLT_ROF_GPU_core* CCPiDefines.h
-fprintf('%s \n', 'All successfully compiled!');
-
-pathA2 = sprintf(['..' fsep '..' fsep], 1i);
-cd(pathA2);
-cd demos
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/installed/MEXed_files_location.txt b/Wrappers/Matlab/mex_compile/installed/MEXed_files_location.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
deleted file mode 100644
index 66ea9be..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "Diffus4th_order_core.h"
-
-/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Edge-preserving parameter (sigma) [REQUIRED]
- * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL, default 300]
- * 5. tau - time-marching step for the explicit scheme [OPTIONAL, default 0.015]
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    float *Input, *Output=NULL, lambda, tau, sigma;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
-    iter_numb = 300; /* iterations number */
-    tau = 0.01; /* marching step parameter */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant");
-    if ((nrhs == 4) || (nrhs == 5))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
-    if (nrhs == 5)  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    Diffus4th_CPU_main(Input, Output, lambda, sigma, iter_numb, tau, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
deleted file mode 100644
index 642362f..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambdaPar - regularization parameter
- * 3. Number of iterations
- * 4. eplsilon: tolerance constant
- * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
- * 6. nonneg: 'nonnegativity (0 is OFF by default)
- * 7. print information: 0 (off) or 1 (on)
- *
- * Output:
- * [1] Filtered/regularized image
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, methTV, printswitch, nonneg;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    float *Input, *Output=NULL, lambda, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 300; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    nonneg = 0; /* default nonnegativity switch, off - 0 */
-    printswitch = 0; /*default print is switched, off - 0 */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7))  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if ((nrhs == 6) || (nrhs == 7))  {
-        nonneg = (int) mxGetScalar(prhs[5]);
-        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
-    }
-    if (nrhs == 7)  {
-        printswitch = (int) mxGetScalar(prhs[6]);
-        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
-    }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    /* running the function */
-    TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
deleted file mode 100644
index 1a0c070..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "FGP_dTV_core.h"
-
-/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
- * which employs structural similarity of the level sets of two images/volumes, see [1,2]
- * The current implementation updates image 1 while image 2 is being fixed.
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
- * 3. lambdaPar - regularization parameter [REQUIRED]
- * 4. Number of iterations [OPTIONAL]
- * 5. eplsilon: tolerance constant [OPTIONAL]
- * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
- * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
- * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
- * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
- *
- * Output:
- * [1] Filtered/regularized image/volume
- *
- * This function is based on the Matlab's codes and papers by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, methTV, printswitch, nonneg;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    const mwSize *dim_array2;    
-    float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    dim_array2 = mxGetDimensions(prhs[1]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    InputRef  = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
-    iter = 300; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    eta = 0.01; /* default smoothing constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    nonneg = 0; /* default nonnegativity switch, off - 0 */
-    printswitch = 0; /*default print is switched, off - 0 */
-    
-        
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
-    if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}   
-    
-    
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  iter = (int) mxGetScalar(prhs[3]); /* iterations number */
-    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
-    if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {
-    eta =  (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
-    }
-    if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {        
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }    
-    if ((nrhs == 8) || (nrhs == 9))  {
-        nonneg = (int) mxGetScalar(prhs[7]);
-        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
-    }
-    if (nrhs == 9)  {
-        printswitch = (int) mxGetScalar(prhs[8]);
-        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
-    }    
-   
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    /* running the function */
-    dTV_FGP_CPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
deleted file mode 100644
index ab45446..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "LLT_ROF_core.h"
-
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
-* 
-* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
-* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
-* lambdaLLT starting with smaller values. 
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambdaROF - ROF-related regularisation parameter
-* 3. lambdaLLT - LLT-related regularisation parameter
-* 4. tau - time-marching step 
-* 5. iter - iterations number (for both models)
-*
-* Output:
-* Filtered/regularised image
-*
-* References: 
-* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iterationsNumb;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;    
-    float *Input, *Output=NULL, lambdaROF, lambdaLLT, tau;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter (ROF), Regularisation parameter (LTT), iterations number, time-marching parameter");
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambdaROF =  (float) mxGetScalar(prhs[1]); /* ROF regularization parameter */
-    lambdaLLT =  (float) mxGetScalar(prhs[2]); /* ROF regularization parameter */    
-    iterationsNumb = 250;
-    tau =  0.0025;
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
-    if ((nrhs == 4) || (nrhs == 5)) iterationsNumb =  (int) mxGetScalar(prhs[3]); /* iterations number */    
-    if (nrhs == 5) tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */  
-        
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));                        
-    }    
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));   
-  
-    LLT_ROF_CPU_main(Input, Output, lambdaROF, lambdaLLT, iterationsNumb, tau, dimX, dimY, dimZ);    
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
deleted file mode 100644
index ec35b8b..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "Diffusion_core.h"
-
-/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1] (2D/3D case)
- * The minimisation is performed using explicit scheme.
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 4. Number of iterations, for explicit scheme >= 150 is recommended  [OPTIONAL parameter]
- * 5. tau - time-marching step for explicit scheme [OPTIONAL parameter]
- * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight [OPTIONAL parameter]
- *
- * Output:
- * [1] Regularized image/volume
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb, penaltytype;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;   
-    
-    float *Input, *Output=NULL, lambda, tau, sigma;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
-    iter_numb = 300; /* iterations number */
-    tau = 0.025; /* marching step parameter */
-    penaltytype = 1; /* Huber penalty by default */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if ((nrhs < 3) || (nrhs > 6)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
-    if ((nrhs == 5) || (nrhs == 6))  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
-    if (nrhs == 6)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[5]); /* Huber, PM or Tukey 'Huber' is the default */
-        if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
-        if (strcmp(penalty_type, "Huber") == 0)  penaltytype = 1;  /* enable 'Huber' penalty */
-        if (strcmp(penalty_type, "PM") == 0)  penaltytype = 2;  /* enable Perona-Malik penalty */
-        if (strcmp(penalty_type, "Tukey") == 0)  penaltytype = 3;  /* enable Tikey Biweight penalty */
-        mxFree(penalty_type);
-    }    
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    Diffusion_CPU_main(Input, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
deleted file mode 100644
index 9833392..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "Diffusion_Inpaint_core.h"
-
-/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Image/volume to inpaint
- * 2. Inpainting Mask of the same size as (1) in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
- * 3. lambda - regularization parameter
- * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 5. Number of iterations, for explicit scheme >= 150 is recommended 
- * 6. tau - time-marching step for explicit scheme
- * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Inpainted image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb, penaltytype, i, inpaint_elements;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;   
-    const mwSize *dim_array2;   
-    
-    float *Input, *Output=NULL, lambda, tau, sigma;
-    unsigned char *Mask;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    dim_array2 = mxGetDimensions(prhs[1]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    Mask  = (unsigned char *) mxGetData(prhs[1]); /* MASK */
-    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
-    sigma = (float) mxGetScalar(prhs[3]); /* Edge-preserving parameter */
-    iter_numb = 300; /* iterations number */
-    tau = 0.025; /* marching step parameter */
-    penaltytype = 1; /* Huber penalty by default */    
-  
-    if ((nrhs < 4) || (nrhs > 7)) mexErrMsgTxt("At least 4 parameters is required, all parameters are: Image(2D/3D), Mask(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
-    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7))  iter_numb = (int) mxGetScalar(prhs[4]); /* iterations number */
-    if ((nrhs == 6) || (nrhs == 7))  tau =  (float) mxGetScalar(prhs[5]); /* marching step parameter */
-    if (nrhs == 7)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[6]); /* Huber, PM or Tukey 'Huber' is the default */
-        if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
-        if (strcmp(penalty_type, "Huber") == 0)  penaltytype = 1;  /* enable 'Huber' penalty */
-        if (strcmp(penalty_type, "PM") == 0)  penaltytype = 2;  /* enable Perona-Malik penalty */
-        if (strcmp(penalty_type, "Tukey") == 0)  penaltytype = 3;  /* enable Tikey Biweight penalty */
-        mxFree(penalty_type);
-    }    
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if (mxGetClassID(prhs[1]) != mxUINT8_CLASS) {mexErrMsgTxt("The mask must be in uint8 precision");}
-    
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }    
-    if (number_of_dims == 3) {
-        if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    }    
-    
-    inpaint_elements = 0;
-    for (i=0; i<(int)(dimY*dimX*dimZ); i++) if (Mask[i] == 1) inpaint_elements++;
-    if (inpaint_elements == 0) mexErrMsgTxt("The mask is full of zeros, nothing to inpaint");        
-    Diffusion_Inpaint_CPU_main(Input, Mask, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
deleted file mode 100644
index b3f2c98..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "NonlocalMarching_Inpaint_core.h"
-
-/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
- * The method is heuristic but computationally efficent (especially for larger images).
- * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
- * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
- *
- * Input:
- * 1. 2D image or sinogram [REQUIRED]
- * 2. Mask of the same size as A in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data) [REQUIRED]
- * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice [OPTIONAL, default 1]
- * 4. Number of iterations [OPTIONAL, default - calculate based on the mask]
- *
- * Output:
- * 1. Inpainted sinogram  
- * 2. updated mask
- * Reference: TBA
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iterations, SW_increment;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    const mwSize *dim_array2;
-    
-    float *Input, *Output=NULL;
-    unsigned char *Mask, *Mask_upd=NULL;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    dim_array2 = mxGetDimensions(prhs[1]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    Mask  = (unsigned char *) mxGetData(prhs[1]); /* MASK */    
-    SW_increment = 1;
-    iterations = 0;
-            
-    if ((nrhs < 2) || (nrhs > 4)) mexErrMsgTxt("At least 4 parameters is required, all parameters are: Image(2D/3D), Mask(2D/3D), Linear increment, Iterations number");
-    if ((nrhs == 3) || (nrhs == 4))  SW_increment =  (int) mxGetScalar(prhs[2]); /* linear increment */
-    if ((nrhs == 4))  iterations =  (int) mxGetScalar(prhs[3]); /* iterations number */
-       
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if (mxGetClassID(prhs[1]) != mxUINT8_CLASS) {mexErrMsgTxt("The mask must be in uint8 precision");}    
-    
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        Mask_upd = (unsigned char*)mxGetPr(plhs[1] = mxCreateNumericArray(2, dim_array, mxUINT8_CLASS, mxREAL));
-    }    
-    if (number_of_dims == 3) {
-        mexErrMsgTxt("Currently 2D supported only");        
-    }           
-    NonlocalMarching_Inpaint_main(Input, Mask, Output, Mask_upd, SW_increment, iterations, 0, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
deleted file mode 100644
index 014c0a0..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "matrix.h"
-#include "mex.h"
-#include "Nonlocal_TV_core.h"
-
-#define EPS 1.0000e-9
-
-/* Matlab wrapper for C-OMP implementation of non-local regulariser
- * Weights and associated indices must be given as an input.
- * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
- * goes in pre-calculation of weights and selection of patches
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. AR_i - indeces of i neighbours
- * 3. AR_j - indeces of j neighbours
- * 4. AR_k - indeces of k neighbours (0 - for 2D case)
- * 5. Weights_ij(k) - associated weights 
- * 6. regularisation parameter
- * 7. iterations number 
- 
- * Output:
- * 1. denoised image/volume 	
- * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    long number_of_dims,  dimX, dimY, dimZ;
-    int IterNumb, NumNeighb = 0;
-    unsigned short *H_i, *H_j, *H_k;
-    const int  *dim_array;
-    const int  *dim_array2;
-    float *A_orig, *Output=NULL, *Weights, lambda;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    dim_array2 = mxGetDimensions(prhs[1]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    A_orig  = (float *) mxGetData(prhs[0]); /* a 2D image or a set of 2D images (3D stack) */
-    H_i  = (unsigned short *) mxGetData(prhs[1]); /* indeces of i neighbours */
-    H_j  = (unsigned short *) mxGetData(prhs[2]); /* indeces of j neighbours */
-    H_k  = (unsigned short *) mxGetData(prhs[3]); /* indeces of k neighbours */
-    Weights = (float *) mxGetData(prhs[4]); /* weights for patches */
-    lambda = (float) mxGetScalar(prhs[5]); /* regularisation parameter */
-    IterNumb = (int) mxGetScalar(prhs[6]); /* the number of iterations */
- 
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];   
-         
-    /*****2D INPUT *****/
-    if (number_of_dims == 2) {
-        dimZ = 0;   
-        NumNeighb = dim_array2[2];
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));  
-        }
-    /*****3D INPUT *****/
-    /****************************************************/
-    if (number_of_dims == 3) {
-        NumNeighb = dim_array2[3];
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    
-    /* run the main function here */
-    Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb);
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
deleted file mode 100644
index f942539..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC and Diamond Light Source Ltd. 
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- * Copyright 2018 Diamond Light Source Ltd. 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "matrix.h"
-#include "mex.h"
-#include "PatchSelect_core.h"
-
-/* C-OMP implementation of non-local weight pre-calculation for non-local priors
- * Weights and associated indices are stored into pre-allocated arrays and passed
- * to the regulariser
- *
- *
- * Input Parameters:
- * 1. 2D/3D grayscale image/volume
- * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
- * 3. Similarity window (half-size of the patch window, e.g. 2)
- * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
- * 5. noise-related parameter to calculate non-local weights
- *
- * Output [2D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. Weights_ij - associated weights
- *
- * Output [3D]:
- * 1. AR_i - indeces of i neighbours
- * 2. AR_j - indeces of j neighbours
- * 3. AR_k - indeces of j neighbours
- * 4. Weights_ijk - associated weights
- */
-/**************************************************/
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-{
-    int number_of_dims,  SearchWindow, SimilarWin, NumNeighb;
-    mwSize dimX, dimY, dimZ;
-    unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
-    const int  *dim_array;
-    float *A, *Weights = NULL, h;
-    int dim_array2[3]; /* for 2D data */
-    int dim_array3[4]; /* for 3D data */
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    A  = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */
-    SearchWindow = (int) mxGetScalar(prhs[1]);    /* Large Searching window */
-    SimilarWin = (int) mxGetScalar(prhs[2]);    /* Similarity window (patch-search)*/
-    NumNeighb = (int) mxGetScalar(prhs[3]); /* the total number of neighbours to take */
-    h = (float) mxGetScalar(prhs[4]); /* NLM parameter */
-
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb;  /* 2D case */
-    dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb;  /* 3D case */
-    
-    /****************2D INPUT ***************/
-    if (number_of_dims == 2) {
-        dimZ = 0;               
-        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
-        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
-        Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
-        }
-    /****************3D INPUT ***************/
-    if (number_of_dims == 3) {        
-        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
-        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
-        H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
-        Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));        
-    }
-    
-    PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h, 0); 
-    
- }
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/ROF_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
deleted file mode 100644
index 55ef2b1..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
+++ /dev/null
@@ -1,77 +0,0 @@
-
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "ROF_TV_core.h"
-
-/* ROF-TV denoising/regularization model [1] (2D/3D case)
- * (MEX wrapper for MATLAB)
- * 
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
- * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
- *
- * D. Kazantsev, 2016-18
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array_i;
-    float *Input, *Output=NULL, lambda, tau;    
-    
-    dim_array_i = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter_numb =  (int) mxGetScalar(prhs[2]); /* iterations number */
-    tau =  (float) mxGetScalar(prhs[3]); /* marching step parameter */  
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number,  marching step constant");
-    /*Handling Matlab output data*/
-    dimX = dim_array_i[0]; dimY = dim_array_i[1]; dimZ = dim_array_i[2];        
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array_i, mxSINGLE_CLASS, mxREAL));          
-    }    
-    if (number_of_dims == 3) {
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array_i, mxSINGLE_CLASS, mxREAL));
-    }
-     
-    TV_ROF_CPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ);    
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c
deleted file mode 100644
index 8636322..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/SB_TV.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "SB_TV_core.h"
-
-/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularisation parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
-*
-* Output:
-* 1. Filtered/regularized image
-*
-* This function is based on the Matlab's code and paper by
-* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
-*/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, methTV, printswitch;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambda, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 100; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    printswitch = 0; /*default print is switched, off - 0 */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if ((nrhs == 5) || (nrhs == 6))  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if (nrhs == 6)  {
-        printswitch = (int) mxGetScalar(prhs[5]);
-        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
-    }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    /* running the function */
-    SB_TV_CPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c
deleted file mode 100644
index aa4eed4..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/TGV.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include "TGV_core.h"
-
-/* C-OMP implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
- *
- * Input Parameters:
- * 1. Noisy image/volume (2D/3D)
- * 2. lambda - regularisation parameter
- * 3. parameter to control the first-order term (alpha1)
- * 4. parameter to control the second-order term (alpha0)
- * 5. Number of Chambolle-Pock (Primal-Dual) iterations
- * 6. Lipshitz constant (default is 12)
- *
- * Output:
- * Filtered/regulariaed image 
- *
- * References:
- * [1] K. Bredies "Total Generalized Variation"
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image/volume */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularisation parameter */
-    alpha1 =  1.0f; /* parameter to control the first-order term */ 
-    alpha0 =  0.5f; /* parameter to control the second-order term */
-    iter =  300; /* Iterations number */      
-    L2 =  12.0f; /* Lipshitz constant */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha1 =  (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */ 
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha0 =  (float) mxGetScalar(prhs[3]);  /* parameter to control the second-order term */
-    if ((nrhs == 5) || (nrhs == 6))  iter =  (int) mxGetScalar(prhs[4]); /* Iterations number */      
-    if (nrhs == 6)  L2 =  (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));        
-    }
-    if (number_of_dims == 3) {
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    }       
-    /* running the function */
-    TGV_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ);        
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/TNV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/TNV.c
deleted file mode 100644
index acea75d..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/TNV.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "TNV_core.h"
-/*
- * C-OMP implementation of Total Nuclear Variation regularisation model (2D + channels) [1]
- * The code is modified from the implementation by Joan Duran <joan.duran@uib.es> see
- * "denoisingPDHG_ipol.cpp" in Joans Collaborative Total Variation package
- *
- * Input Parameters:
- * 1. Noisy volume of 2D + channel dimension, i.e. 3D volume
- * 2. lambda - regularisation parameter
- * 3. Number of iterations [OPTIONAL parameter]
- * 4. eplsilon - tolerance constant [OPTIONAL parameter]
- * 5. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
- *
- * Output:
- * 1. Filtered/regularized image
- *
- * [1]. Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.
- */
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    float *Input, *Output=NULL, lambda, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 4)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D + channels), Regularisation parameter, Regularization parameter, iterations number, tolerance");
-    
-    Input  = (float *) mxGetData(prhs[0]); /* noisy sequence of channels (2D + channels) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 1000; /* default iterations number */
-    epsil = 1.00e-05; /* default tolerance constant */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    if ((nrhs == 3) || (nrhs == 4))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if (nrhs == 4)  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) mexErrMsgTxt("The input must be 3D: [X,Y,Channels]");
-    if (number_of_dims == 3) {
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-        /* running the function */
-        TNV_CPU_main(Input, Output, lambda, iter, epsil, dimX, dimY, dimZ);
-    }
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/TV_energy.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/TV_energy.c
deleted file mode 100644
index d457f46..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/TV_energy.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "utils.h"
-/*
- * Function to calculate TV energy value with respect to the denoising variational problem
- * 
- * Input:
- * 1. Denoised Image/volume
- * 2. Original (noisy) Image/volume
- * 3. lambda - regularisation parameter 
- * 
- * Output:
- * 1. Energy function value
- * 
- */
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, type;
-    
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    float *Input, *Input0, lambda;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs != 4)) mexErrMsgTxt("4 inputs: Two images or volumes of the same size required, estimated and the original (noisy), regularisation parameter, type");
-    
-    Input  = (float *) mxGetData(prhs[0]); /* Denoised Image/volume */
-    Input0  = (float *) mxGetData(prhs[1]); /* Original (noisy) Image/volume */
-    lambda =  (float) mxGetScalar(prhs[2]); /* regularisation parameter */
-    type =  (int) mxGetScalar(prhs[3]); /* type of energy */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    /*output energy function value */
-    plhs[0] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
-    float *funcvalA = (float *) mxGetData(plhs[0]);
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-		TV_energy2D(Input, Input0, funcvalA, lambda, type, dimX, dimY);
-		}
-    if (number_of_dims == 3) {
-        TV_energy3D(Input, Input0, funcvalA, lambda, type, dimX, dimY, dimZ);
-    }
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
deleted file mode 100644
index 0cc042b..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "Diffus_4thO_GPU_core.h"
-
-/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Edge-preserving parameter (sigma) [REQUIRED]
- * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL, default 300]
- * 5. tau - time-marching step for the explicit scheme [OPTIONAL, default 0.015]
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    float *Input, *Output=NULL, lambda, tau, sigma;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
-    iter_numb = 300; /* iterations number */
-    tau = 0.01; /* marching step parameter */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant");
-    if ((nrhs == 4) || (nrhs == 5))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
-    if (nrhs == 5)  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    Diffus4th_GPU_main(Input, Output, lambda, sigma, iter_numb, tau, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
deleted file mode 100644
index c174e75..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "TV_FGP_GPU_core.h"
-
-/* GPU (CUDA) implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambdaPar - regularization parameter
- * 3. Number of iterations
- * 4. eplsilon: tolerance constant
- * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
- * 6. nonneg: 'nonnegativity (0 is OFF by default)
- * 7. print information: 0 (off) or 1 (on)
- *
- * Output:
- * [1] Filtered/regularized image
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, methTV, printswitch, nonneg;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambda, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 300; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    nonneg = 0; /* default nonnegativity switch, off - 0 */
-    printswitch = 0; /*default print is switched, off - 0 */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7))  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if ((nrhs == 6) || (nrhs == 7))  {
-        nonneg = (int) mxGetScalar(prhs[5]);
-        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
-    }
-    if (nrhs == 7)  {
-        printswitch = (int) mxGetScalar(prhs[6]);
-        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
-    }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    /* running the function */
-    TV_FGP_GPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ);    
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
deleted file mode 100644
index 3f5a4b3..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "dTV_FGP_GPU_core.h"
-
-/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
- * which employs structural similarity of the level sets of two images/volumes, see [1,2]
- * The current implementation updates image 1 while image 2 is being fixed.
- *
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
- * 3. lambdaPar - regularization parameter [REQUIRED]
- * 4. Number of iterations [OPTIONAL]
- * 5. eplsilon: tolerance constant [OPTIONAL]
- * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
- * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
- * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
- * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
- *
- * Output:
- * [1] Filtered/regularized image/volume
- *
- * This function is based on the Matlab's codes and papers by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
- */
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, methTV, printswitch, nonneg;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    const mwSize *dim_array2;
-    
-    float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    dim_array2 = mxGetDimensions(prhs[1]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    InputRef  = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
-    iter = 300; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    eta = 0.01; /* default smoothing constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    nonneg = 0; /* default nonnegativity switch, off - 0 */
-    printswitch = 0; /*default print is switched, off - 0 */
-    
-        
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
-    if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}   
-    
-    
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  iter = (int) mxGetScalar(prhs[3]); /* iterations number */
-    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
-    if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {
-    eta =  (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
-    }
-    if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {        
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }    
-    if ((nrhs == 8) || (nrhs == 9))  {
-        nonneg = (int) mxGetScalar(prhs[7]);
-        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
-    }
-    if (nrhs == 9)  {
-        printswitch = (int) mxGetScalar(prhs[8]);
-        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
-    }    
-   
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    /* running the function */
-    dTV_FGP_GPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
deleted file mode 100644
index e8da4ce..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "LLT_ROF_GPU_core.h"
-
-/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
-* 
-* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
-* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
-* lambdaLLT starting with smaller values. 
-*
-* Input Parameters:
-* 1. U0 - original noise image/volume
-* 2. lambdaROF - ROF-related regularisation parameter
-* 3. lambdaLLT - LLT-related regularisation parameter
-* 4. tau - time-marching step 
-* 5. iter - iterations number (for both models)
-*
-* Output:
-* Filtered/regularised image
-*
-* References: 
-* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
-* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iterationsNumb;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambdaROF, lambdaLLT, tau;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter (ROF), Regularisation parameter (LTT), iterations number, time-marching parameter");
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambdaROF =  (float) mxGetScalar(prhs[1]); /* ROF regularization parameter */
-    lambdaLLT =  (float) mxGetScalar(prhs[2]); /* ROF regularization parameter */    
-    iterationsNumb = 250;
-    tau =  0.0025;
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
-    if ((nrhs == 4) || (nrhs == 5)) iterationsNumb =  (int) mxGetScalar(prhs[3]); /* iterations number */    
-    if (nrhs == 5) tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */  
-        
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));                        
-    }    
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));   
-  
-    LLT_ROF_GPU_main(Input, Output, lambdaROF, lambdaLLT, iterationsNumb, tau, dimX, dimY, dimZ);    
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
deleted file mode 100644
index 1cd0cdc..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include <stdio.h>
-#include <string.h>
-#include "NonlDiff_GPU_core.h"
-
-/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
- * The minimisation is performed using explicit scheme. 
- *
- * Input Parameters:
- * 1. Noisy image/volume 
- * 2. lambda - regularization parameter
- * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
- * 4. Number of iterations, for explicit scheme >= 150 is recommended 
- * 5. tau - time-marching step for explicit scheme
- * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
- * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb, penaltytype;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambda, tau, sigma;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
-    iter_numb = 300; /* iterations number */
-    tau = 0.025; /* marching step parameter */
-    penaltytype = 1; /* Huber penalty by default */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if ((nrhs < 3) || (nrhs > 6)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
-    if ((nrhs == 5) || (nrhs == 6))  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
-    if (nrhs == 6)  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[5]); /* Huber, PM or Tukey 'Huber' is the default */
-        if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
-        if (strcmp(penalty_type, "Huber") == 0)  penaltytype = 1;  /* enable 'Huber' penalty */
-        if (strcmp(penalty_type, "PM") == 0)  penaltytype = 2;  /* enable Perona-Malik penalty */
-        if (strcmp(penalty_type, "Tukey") == 0)  penaltytype = 3;  /* enable Tikey Biweight penalty */
-        mxFree(penalty_type);
-    }    
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    NonlDiff_GPU_main(Input, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
deleted file mode 100644
index bd01d55..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "TV_ROF_GPU_core.h"
-
-/* ROF-TV denoising/regularization model [1] (2D/3D case)
- * (MEX wrapper for MATLAB)
- * 
- * Input Parameters:
- * 1. Noisy image/volume [REQUIRED]
- * 2. lambda - regularization parameter [REQUIRED]
- * 3. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
- * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
- *
- * Output:
- * [1] Regularized image/volume 
- *
- * This function is based on the paper by
- * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
- *
- * D. Kazantsev, 2016-18
- */
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter_numb;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambda, tau;
-    
-    dim_array = mxGetDimensions(prhs[0]);
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    Input  = (float *) mxGetData(prhs[0]);
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter_numb =  (int) mxGetScalar(prhs[2]); /* iterations number */
-    tau =  (float) mxGetScalar(prhs[3]); /* marching step parameter */  
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number,  marching step constant");
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    /* output arrays*/
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        /* output image/volume */
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));                        
-    }    
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    TV_ROF_GPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ);    
-}
\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
deleted file mode 100644
index 9d1328f..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "TV_SB_GPU_core.h"
-
-/* CUDA mex-file for implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularisation parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
-*
-* Output:
-* 1. Filtered/regularized image
-*
-* This function is based on the Matlab's code and paper by
-* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
-*/
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, methTV, printswitch;
-    mwSize dimX, dimY, dimZ;
-    const mwSize *dim_array;
-    
-    float *Input, *Output=NULL, lambda, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 100; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    printswitch = 0; /*default print is switched, off - 0 */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if ((nrhs == 5) || (nrhs == 6))  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if (nrhs == 6)  {
-        printswitch = (int) mxGetScalar(prhs[5]);
-        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
-    }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    /* running the function */
-    TV_SB_GPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
deleted file mode 100644
index edb551d..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-This work is part of the Core Imaging Library developed by
-Visual Analytics and Imaging System Group of the Science Technology
-Facilities Council, STFC
-
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "mex.h"
-#include "TGV_GPU_core.h"
-
-/* CUDA implementation of Primal-Dual denoising method for 
- * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
- *
- * Input Parameters:
- * 1. Noisy image (2D) (required)
- * 2. lambda - regularisation parameter (required)
- * 3. parameter to control the first-order term (alpha1) (default - 1)
- * 4. parameter to control the second-order term (alpha0) (default - 0.5)
- * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300)
- * 6. Lipshitz constant (default is 12)
- *
- * Output:
- * Filtered/regulariaed image 
- *
- * References:
- * [1] K. Bredies "Total Generalized Variation"
- */
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter;
-    mwSize dimX, dimY;
-    const mwSize *dim_array;
-    float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularisation parameter */
-    alpha1 =  1.0f; /* parameter to control the first-order term */ 
-    alpha0 =  0.5f; /* parameter to control the second-order term */
-    iter =  300; /* Iterations number */      
-    L2 =  12.0f; /* Lipshitz constant */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha1 =  (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */ 
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha0 =  (float) mxGetScalar(prhs[3]);  /* parameter to control the second-order term */
-    if ((nrhs == 5) || (nrhs == 6))  iter =  (int) mxGetScalar(prhs[4]); /* Iterations number */      
-    if (nrhs == 6)  L2 =  (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1];
-    
-    if (number_of_dims == 2) {
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        /* running the function */
-        TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY);        
-    }
-    if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");}       
-}
diff --git a/Wrappers/Matlab/supp/RMSE.m b/Wrappers/Matlab/supp/RMSE.m
deleted file mode 100644
index 002f776..0000000
--- a/Wrappers/Matlab/supp/RMSE.m
+++ /dev/null
@@ -1,7 +0,0 @@
-function err = RMSE(signal1, signal2)
-%RMSE Root Mean Squared Error
-
-err = sum((signal1 - signal2).^2)/length(signal1);  % MSE
-err = sqrt(err);                                    % RMSE
-
-end
\ No newline at end of file
diff --git a/Wrappers/Matlab/supp/my_red_yellowMAP.mat b/Wrappers/Matlab/supp/my_red_yellowMAP.mat
deleted file mode 100644
index c2a5b87..0000000
Binary files a/Wrappers/Matlab/supp/my_red_yellowMAP.mat and /dev/null differ
diff --git a/Wrappers/Python/CMakeLists.txt b/Wrappers/Python/CMakeLists.txt
deleted file mode 100644
index c2ef855..0000000
--- a/Wrappers/Python/CMakeLists.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-#   Copyright 2018 Edoardo Pasca
-cmake_minimum_required (VERSION 3.0)
-
-project(regulariserPython)
-#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
-
-# The version number.
-
-#set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
-
-# conda orchestrated build
-message("CIL_VERSION: ${CIL_VERSION}")
-#include (GenerateExportHeader)
-
-find_package(PythonInterp REQUIRED)
-if (PYTHONINTERP_FOUND)
-  message ("Current Python " ${PYTHON_VERSION_STRING} " found " ${PYTHON_EXECUTABLE})
-endif()
-
-	
-## Build the regularisers package as a library
-message("Creating Regularisers as shared library")
-
-message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
-
-set(CMAKE_BUILD_TYPE "Release")
-
-if(WIN32)
-  set (FLAGS "/DWIN32 /EHsc /openmp /DCCPiCore_EXPORTS")
-  set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT.lib")
-  
-  set (EXTRA_LIBRARIES)
-		
-  message("library lib: ${LIBRARY_LIB}")
-  
-elseif(UNIX)
-   set (FLAGS "-fopenmp -O2 -funsigned-char -Wall  -Wl,--no-undefined  -DCCPiReconstructionIterative_EXPORTS -std=c++0x")  
-   set (EXTRA_LIBRARIES 
-		"gomp"
-		)
-endif()
-
-# GPU regularisers
-if (BUILD_CUDA)
-    find_package(CUDA)
-    if (CUDA_FOUND)
-      message("CUDA FOUND")
-      set (SETUP_GPU_WRAPPERS "extra_libraries += ['cilregcuda']\n\
-setup( \n\
-        name='ccpi', \n\
-        description='CCPi Core Imaging Library - Image regularisers GPU',\n\
-        version=cil_version,\n\
-        cmdclass = {'build_ext': build_ext},\n\
-        ext_modules = [Extension('ccpi.filters.gpu_regularisers',\n\
-                                  sources=[ \n\
-                                          os.path.join('.' , 'src', 'gpu_regularisers.pyx' ),\n\
-                                            ],\n\
-                                 include_dirs=extra_include_dirs, \n\
-                                 library_dirs=extra_library_dirs, \n\
-                                 extra_compile_args=extra_compile_args, \n\
-                                 libraries=extra_libraries ), \n\
-        ],\n\
-        zip_safe = False,	\n\
-        packages = {'ccpi','ccpi.filters'},\n\
-    )")
-    else()
-      message("CUDA NOT FOUND")
-      set(SETUP_GPU_WRAPPERS "#CUDA NOT FOUND")
-    endif()
-endif()
-configure_file("${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in" "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
-
-
-find_package(PythonInterp)
-find_package(PythonLibs)
-if (PYTHONINTERP_FOUND)
-  message(STATUS "Found PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}")
-  message(STATUS "Python version ${PYTHON_VERSION_STRING}")
-endif()
-if (PYTHONLIBS_FOUND)
-  message(STATUS "Found PYTHON_INCLUDE_DIRS=${PYTHON_INCLUDE_DIRS}")
-  message(STATUS "Found PYTHON_LIBRARIES=${PYTHON_LIBRARIES}")
-endif()
-
-if (PYTHONINTERP_FOUND)
-    message("Python found " ${PYTHON_EXECUTABLE})
-    set(SETUP_PY_IN "${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in")
-    set(SETUP_PY    "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
-    #set(DEPS        "${CMAKE_CURRENT_SOURCE_DIR}/module/__init__.py")
-    set (DEPS       "${CMAKE_BINARY_DIR}/Core/")
-    set(OUTPUT      "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp")
-
-    configure_file(${SETUP_PY_IN} ${SETUP_PY})
-
-    message("Core binary dir " ${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE})
-    
-    if (CONDA_BUILD)
-      add_custom_command(OUTPUT ${OUTPUT}
-                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
-                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
-                       COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
-                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core
-                                                       ${PYTHON_EXECUTABLE} ${SETUP_PY} install
-                       COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
-                       DEPENDS cilreg)
-
-    else()
-      if (WIN32)
-        add_custom_command(OUTPUT ${OUTPUT}
-                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
-                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
-                       COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
-                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE}
-                                                       ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
-                       COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
-                       DEPENDS cilreg)
-      else()
-        add_custom_command(OUTPUT ${OUTPUT}
-                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
-                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
-                       COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
-                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core
-                                                       ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
-                       COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
-                       DEPENDS cilreg)
-      endif()
-      install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ccpi 
-              DESTINATION ${PYTHON_DEST})
-    endif()
-    
-    
-    add_custom_target(PythonWrapper ALL DEPENDS ${OUTPUT})
-
-    #install(CODE "execute_process(COMMAND ${PYTHON} ${SETUP_PY} install)")
-endif()
diff --git a/Wrappers/Python/ccpi/__init__.py b/Wrappers/Python/ccpi/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/Wrappers/Python/ccpi/filters/__init__.py b/Wrappers/Python/ccpi/filters/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py
deleted file mode 100644
index 588ea32..0000000
--- a/Wrappers/Python/ccpi/filters/regularisers.py
+++ /dev/null
@@ -1,214 +0,0 @@
-"""
-script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
-"""
-
-from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU, TGV_CPU, LLT_ROF_CPU, PATCHSEL_CPU, NLTV_CPU
-try:
-    from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU, PATCHSEL_GPU
-    gpu_enabled = True
-except ImportError:
-    gpu_enabled = False    
-from ccpi.filters.cpu_regularisers import NDF_INPAINT_CPU, NVM_INPAINT_CPU
-
-def ROF_TV(inputData, regularisation_parameter, iterations,
-                     time_marching_parameter,device='cpu'):
-    if device == 'cpu':
-        return TV_ROF_CPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     time_marching_parameter)
-    elif device == 'gpu' and gpu_enabled:
-        return TV_ROF_GPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     time_marching_parameter)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-
-def FGP_TV(inputData, regularisation_parameter,iterations,
-                     tolerance_param, methodTV, nonneg, printM, device='cpu'):
-    if device == 'cpu':
-        return TV_FGP_CPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     nonneg,
-                     printM)
-    elif device == 'gpu' and gpu_enabled:
-        return TV_FGP_GPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     nonneg,
-                     printM)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-def SB_TV(inputData, regularisation_parameter, iterations,
-                     tolerance_param, methodTV, printM, device='cpu'):
-    if device == 'cpu':
-        return TV_SB_CPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     printM)
-    elif device == 'gpu' and gpu_enabled:
-        return TV_SB_GPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     printM)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-def FGP_dTV(inputData, refdata, regularisation_parameter, iterations,
-                     tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'):
-    if device == 'cpu':
-        return dTV_FGP_CPU(inputData,
-                     refdata,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     eta_const,
-                     methodTV,
-                     nonneg,
-                     printM)
-    elif device == 'gpu' and gpu_enabled:
-        return dTV_FGP_GPU(inputData,
-                     refdata,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     eta_const,
-                     methodTV,
-                     nonneg,
-                     printM)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-def TNV(inputData, regularisation_parameter, iterations, tolerance_param):
-        return TNV_CPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param)
-def NDF(inputData, regularisation_parameter, edge_parameter, iterations,
-                     time_marching_parameter, penalty_type, device='cpu'):
-    if device == 'cpu':
-        return NDF_CPU(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter,
-                     penalty_type)
-    elif device == 'gpu' and gpu_enabled:
-        return NDF_GPU(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter,
-                     penalty_type)
-    else:
-        if not gpu_enabled and device == 'gpu':
-    	    raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-def Diff4th(inputData, regularisation_parameter, edge_parameter, iterations,
-                     time_marching_parameter, device='cpu'):
-    if device == 'cpu':
-        return Diff4th_CPU(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter)
-    elif device == 'gpu' and gpu_enabled:
-        return Diff4th_GPU(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-        
-def PatchSelect(inputData, searchwindow, patchwindow, neighbours, edge_parameter, device='cpu'):
-    if device == 'cpu':
-        return PATCHSEL_CPU(inputData,
-                     searchwindow,
-                     patchwindow,
-                     neighbours, 
-                     edge_parameter)
-    elif device == 'gpu' and gpu_enabled:
-        return PATCHSEL_GPU(inputData,
-                     searchwindow,
-                     patchwindow,
-                     neighbours, 
-                     edge_parameter)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-
-def NLTV(inputData, H_i, H_j, H_k, Weights, regularisation_parameter, iterations):
-    return NLTV_CPU(inputData,
-                     H_i,
-                     H_j,
-                     H_k, 
-                     Weights,
-                     regularisation_parameter,
-                     iterations)
-
-def TGV(inputData, regularisation_parameter, alpha1, alpha0, iterations,
-                     LipshitzConst, device='cpu'):
-    if device == 'cpu':
-        return TGV_CPU(inputData, 
-					regularisation_parameter, 
-					alpha1, 
-					alpha0, 
-					iterations,
-                    LipshitzConst)
-    elif device == 'gpu' and gpu_enabled:
-        return TGV_GPU(inputData, 
-					regularisation_parameter, 
-					alpha1, 
-					alpha0, 
-					iterations,
-                    LipshitzConst)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-def LLT_ROF(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations,
-                     time_marching_parameter, device='cpu'):
-    if device == 'cpu':
-        return LLT_ROF_CPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
-    elif device == 'gpu' and gpu_enabled:
-        return LLT_ROF_GPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
-    else:
-        if not gpu_enabled and device == 'gpu':
-            raise ValueError ('GPU is not available')
-        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
-                         .format(device))
-def NDF_INP(inputData, maskData, regularisation_parameter, edge_parameter, iterations,
-                     time_marching_parameter, penalty_type):
-        return NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter, 
-        edge_parameter, iterations, time_marching_parameter, penalty_type)
-        
-def NVM_INP(inputData, maskData, SW_increment, iterations):
-        return NVM_INPAINT_CPU(inputData, maskData, SW_increment, iterations)
diff --git a/Wrappers/Python/conda-recipe/bld.bat b/Wrappers/Python/conda-recipe/bld.bat
deleted file mode 100644
index 6c84355..0000000
--- a/Wrappers/Python/conda-recipe/bld.bat
+++ /dev/null
@@ -1,20 +0,0 @@
-IF NOT DEFINED CIL_VERSION (
-ECHO CIL_VERSION Not Defined.
-exit 1
-)
-
-mkdir "%SRC_DIR%\ccpi"
-ROBOCOPY /E "%RECIPE_DIR%\..\.." "%SRC_DIR%\ccpi"
-ROBOCOPY /E "%RECIPE_DIR%\..\..\..\Core" "%SRC_DIR%\Core"
-::cd %SRC_DIR%\ccpi\Python
-cd %SRC_DIR%
-
-:: issue cmake to create setup.py
-cmake -G "NMake Makefiles" %RECIPE_DIR%\..\..\..\ -DBUILD_PYTHON_WRAPPERS=ON -DCONDA_BUILD=ON -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB="%CONDA_PREFIX%\lib" -DLIBRARY_INC="%CONDA_PREFIX%" -DCMAKE_INSTALL_PREFIX="%PREFIX%\Library" 
-
-::%PYTHON% setup-regularisers.py build_ext
-::if errorlevel 1 exit 1
-::%PYTHON% setup-regularisers.py install
-::if errorlevel 1 exit 1
-nmake install
-if errorlevel 1 exit 1
\ No newline at end of file
diff --git a/Wrappers/Python/conda-recipe/build.sh b/Wrappers/Python/conda-recipe/build.sh
deleted file mode 100644
index 39c0f2c..0000000
--- a/Wrappers/Python/conda-recipe/build.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-
-mkdir "$SRC_DIR/ccpi"
-cp -rv "$RECIPE_DIR/../.." "$SRC_DIR/ccpi"
-cp -rv "$RECIPE_DIR/../../../Core" "$SRC_DIR/Core"
-
-cd $SRC_DIR
-##cuda=off
-
-cmake -G "Unix Makefiles" $RECIPE_DIR/../../../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX
-
-
-make install
-
-#$PYTHON setup-regularisers.py build_ext
-#$PYTHON setup-regularisers.py install
-
-
diff --git a/Wrappers/Python/conda-recipe/conda_build_config.yaml b/Wrappers/Python/conda-recipe/conda_build_config.yaml
deleted file mode 100644
index fbe82dc..0000000
--- a/Wrappers/Python/conda-recipe/conda_build_config.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-python:
-  - 2.7 # [not win]
-  - 3.5
-  - 3.6
-#  - 3.7
-numpy:
-  - 1.12
-  - 1.14
-  - 1.15
diff --git a/Wrappers/Python/conda-recipe/meta.yaml b/Wrappers/Python/conda-recipe/meta.yaml
deleted file mode 100644
index 7435b2b..0000000
--- a/Wrappers/Python/conda-recipe/meta.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-package:
-  name: ccpi-regulariser
-  version: {{CIL_VERSION}}
-  
-build:
-  preserve_egg_dir: False
-  number: 0
-  script_env:
-    - CIL_VERSION
-  
-test:
-  files:
-    - lena_gray_512.tif
-  requires:
-    - pillow=4.1.1
-
-requirements:
-  build:
-    - python
-    - numpy {{ numpy }}
-    - setuptools
-    - cython
-    - vc 14 # [win and py36] 
-    - vc 14 # [win and py35] 
-    - vc 9  # [win and py27]
-    - cmake 
-
-  run:
-    - {{ pin_compatible('numpy', max_pin='x.x') }}
-    - python
-    - numpy
-    - vc 14 # [win and py36] 
-    - vc 14 # [win and py35] 
-    - vc 9  # [win and py27]
-    - libgcc-ng
-
-about:
-  home: http://www.ccpi.ac.uk
-  license:  BSD license
-  summary: 'CCPi Core Imaging Library Quantification Toolbox'
diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py
deleted file mode 100755
index 21f3216..0000000
--- a/Wrappers/Python/conda-recipe/run_test.py
+++ /dev/null
@@ -1,819 +0,0 @@
-import unittest
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from PIL import Image
-
-class TiffReader(object):
-    def imread(self, filename):
-        return np.asarray(Image.open(filename))
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-def nrmse(im1, im2):
-    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
-    max_val = max(np.max(im1), np.max(im2))
-    min_val = min(np.min(im1), np.min(im2))
-    return 1 - (rmse / (max_val - min_val))
-    
-def rmse(im1, im2):
-    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
-    return rmse
-###############################################################################
-
-class TestRegularisers(unittest.TestCase):
-    
-
-    def test_ROF_TV_CPU_vs_GPU(self):
-        #print ("tomas debug test function")
-        print(__name__)
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("____________ROF-TV bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        # set parameters
-        pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 2500,\
-        'time_marching_parameter': 0.00002
-        }
-        print ("#############ROF TV CPU####################")
-        start_time = timeit.default_timer()
-        rof_cpu = ROF_TV(pars['input'],
-                     pars['regularisation_parameter'],
-                     pars['number_of_iterations'],
-                     pars['time_marching_parameter'],'cpu')
-        rms = rmse(Im, rof_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("##############ROF TV GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            rof_gpu = ROF_TV(pars['input'], 
-                             pars['regularisation_parameter'],
-                             pars['number_of_iterations'], 
-                             pars['time_marching_parameter'],'gpu')
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-
-        rms = rmse(Im, rof_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = ROF_TV
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-04
-        diff_im = np.zeros(np.shape(rof_cpu))
-        diff_im = abs(rof_cpu - rof_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum() , 1)
-        
-    def test_FGP_TV_CPU_vs_GPU(self):
-        print(__name__)
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("____________FGP-TV bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        
-        # set parameters
-        pars = {'algorithm' : FGP_TV, \
-                'input' : u0,\
-                'regularisation_parameter':0.04, \
-                'number_of_iterations' :1200 ,\
-                'tolerance_constant':0.00001,\
-                'methodTV': 0 ,\
-                'nonneg': 0 ,\
-                'printingOut': 0 
-                }
-                
-        print ("#############FGP TV CPU####################")
-        start_time = timeit.default_timer()
-        fgp_cpu = FGP_TV(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['number_of_iterations'],
-                      pars['tolerance_constant'], 
-                      pars['methodTV'],
-                      pars['nonneg'],
-                      pars['printingOut'],'cpu')  
-                     
-                     
-        rms = rmse(Im, fgp_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        
-        print ("##############FGP TV GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            fgp_gpu = FGP_TV(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['number_of_iterations'],
-                      pars['tolerance_constant'], 
-                      pars['methodTV'],
-                      pars['nonneg'],
-                      pars['printingOut'],'gpu')
-
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-
-        rms = rmse(Im, fgp_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = FGP_TV
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        
-        print ("--------Compare the results--------")
-        tolerance = 1e-05
-        diff_im = np.zeros(np.shape(fgp_cpu))
-        diff_im = abs(fgp_cpu - fgp_gpu)
-        diff_im[diff_im > tolerance] = 1
-
-        self.assertLessEqual(diff_im.sum() , 1)
-
-    def test_SB_TV_CPU_vs_GPU(self):
-        print(__name__)
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("____________SB-TV bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        
-        # set parameters
-        pars = {'algorithm' : SB_TV, \
-                'input' : u0,\
-                'regularisation_parameter':0.04, \
-                'number_of_iterations' :150 ,\
-                'tolerance_constant':1e-05,\
-                'methodTV': 0 ,\
-                'printingOut': 0 
-                }
-                
-        print ("#############SB-TV CPU####################")
-        start_time = timeit.default_timer()
-        sb_cpu = SB_TV(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['number_of_iterations'],
-                      pars['tolerance_constant'], 
-                      pars['methodTV'],
-                      pars['printingOut'],'cpu')  
-                     
-                     
-        rms = rmse(Im, sb_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        
-        print ("##############SB TV GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            
-            sb_gpu = SB_TV(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['number_of_iterations'],
-                      pars['tolerance_constant'], 
-                      pars['methodTV'],
-                      pars['printingOut'],'gpu')
-
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-
-        rms = rmse(Im, sb_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = SB_TV
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-05
-        diff_im = np.zeros(np.shape(sb_cpu))
-        diff_im = abs(sb_cpu - sb_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum(), 1)
-
-    def test_TGV_CPU_vs_GPU(self):
-        print(__name__)
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("____________TGV bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        
-        # set parameters
-        pars = {'algorithm' : TGV, \
-                'input' : u0,\
-                'regularisation_parameter':0.04, \
-                'alpha1':1.0,\
-                'alpha0':2.0,\
-                'number_of_iterations' :250 ,\
-                'LipshitzConstant' :12 ,\
-                }
-                
-        print ("#############TGV CPU####################")
-        start_time = timeit.default_timer()
-        tgv_cpu = TGV(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['alpha1'],
-                      pars['alpha0'],
-                      pars['number_of_iterations'],
-                      pars['LipshitzConstant'],'cpu')
-                     
-        rms = rmse(Im, tgv_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        
-        print ("##############TGV GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            tgv_gpu = TGV(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['alpha1'],
-                      pars['alpha0'],
-                      pars['number_of_iterations'],
-                      pars['LipshitzConstant'],'gpu')
-
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-
-        rms = rmse(Im, tgv_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = TGV
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-05
-        diff_im = np.zeros(np.shape(tgv_gpu))
-        diff_im = abs(tgv_cpu - tgv_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum() , 1)
-
-    def test_LLT_ROF_CPU_vs_GPU(self):
-        print(__name__)
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("____________LLT-ROF bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        
-        # set parameters
-        pars = {'algorithm' : LLT_ROF, \
-                'input' : u0,\
-                'regularisation_parameterROF':0.04, \
-                'regularisation_parameterLLT':0.01, \
-                'number_of_iterations' :1000 ,\
-                'time_marching_parameter' :0.0001 ,\
-                }
-                
-        print ("#############LLT- ROF CPU####################")
-        start_time = timeit.default_timer()
-        lltrof_cpu = LLT_ROF(pars['input'], 
-                      pars['regularisation_parameterROF'],
-                      pars['regularisation_parameterLLT'],
-                      pars['number_of_iterations'],
-                      pars['time_marching_parameter'],'cpu')
-        
-        rms = rmse(Im, lltrof_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("#############LLT- ROF GPU####################")
-        start_time = timeit.default_timer()
-        try:
-            lltrof_gpu = LLT_ROF(pars['input'], 
-                      pars['regularisation_parameterROF'],
-                      pars['regularisation_parameterLLT'],
-                      pars['number_of_iterations'],
-                      pars['time_marching_parameter'],'gpu')
-        
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-
-        rms = rmse(Im, lltrof_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = LLT_ROF
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-04
-        diff_im = np.zeros(np.shape(lltrof_gpu))
-        diff_im = abs(lltrof_cpu - lltrof_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum(), 1)
-
-    def test_NDF_CPU_vs_GPU(self):
-        print(__name__)
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_______________NDF bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        
-        # set parameters
-        pars = {'algorithm' : NDF, \
-                'input' : u0,\
-                'regularisation_parameter':0.06, \
-                'edge_parameter':0.04,\
-                'number_of_iterations' :1000 ,\
-                'time_marching_parameter':0.025,\
-                'penalty_type':  1
-                }
-                
-        print ("#############NDF CPU####################")
-        start_time = timeit.default_timer()
-        ndf_cpu = NDF(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['edge_parameter'], 
-                      pars['number_of_iterations'],
-                      pars['time_marching_parameter'], 
-                      pars['penalty_type'],'cpu')
-                     
-        rms = rmse(Im, ndf_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        
-        print ("##############NDF GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            ndf_gpu = NDF(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['edge_parameter'], 
-                      pars['number_of_iterations'],
-                      pars['time_marching_parameter'], 
-                      pars['penalty_type'],'gpu')
-                     
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-        rms = rmse(Im, ndf_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = NDF
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-05
-        diff_im = np.zeros(np.shape(ndf_cpu))
-        diff_im = abs(ndf_cpu - ndf_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum(), 1)
-
-        
-    def test_Diff4th_CPU_vs_GPU(self):
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("___Anisotropic Diffusion 4th Order (2D)____")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        # set parameters
-        pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.001
-        }
-        
-        print ("#############Diff4th CPU####################")
-        start_time = timeit.default_timer()
-        diff4th_cpu = Diff4th(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['edge_parameter'], 
-                      pars['number_of_iterations'],
-                      pars['time_marching_parameter'],'cpu')
-                     
-        rms = rmse(Im, diff4th_cpu)
-        pars['rmse'] = rms
-
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("##############Diff4th GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            diff4th_gpu = Diff4th(pars['input'], 
-                      pars['regularisation_parameter'],
-                      pars['edge_parameter'], 
-                      pars['number_of_iterations'],
-                      pars['time_marching_parameter'], 'gpu')
-                     
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-        rms = rmse(Im, diff4th_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = Diff4th
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-05
-        diff_im = np.zeros(np.shape(diff4th_cpu))
-        diff_im = abs(diff4th_cpu - diff4th_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum() , 1)
-
-    def test_FDGdTV_CPU_vs_GPU(self):
-        filename = os.path.join("lena_gray_512.tif")
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        
-        Im = Im/255
-        perc = 0.05
-        u0 = Im + np.random.normal(loc = 0 ,
-                                          scale = perc * Im , 
-                                          size = np.shape(Im))
-        u_ref = Im + np.random.normal(loc = 0 ,
-                                          scale = 0.01 * Im , 
-                                          size = np.shape(Im))
-        
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-        u0 = u0.astype('float32')
-        u_ref = u_ref.astype('float32')
-        
-
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("____________FGP-dTV bench___________________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        
-        # set parameters
-        pars = {'algorithm' : FGP_dTV, \
-                'input' : u0,\
-                'refdata' : u_ref,\
-                'regularisation_parameter':0.04, \
-                'number_of_iterations' :1000 ,\
-                'tolerance_constant':1e-07,\
-                'eta_const':0.2,\
-                'methodTV': 0 ,\
-                'nonneg': 0 ,\
-                'printingOut': 0 
-                }
-                
-        print ("#############FGP dTV CPU####################")
-        start_time = timeit.default_timer()
-        fgp_dtv_cpu = FGP_dTV(pars['input'], 
-                      pars['refdata'], 
-                      pars['regularisation_parameter'],
-                      pars['number_of_iterations'],
-                      pars['tolerance_constant'], 
-                      pars['eta_const'], 
-                      pars['methodTV'],
-                      pars['nonneg'],
-                      pars['printingOut'],'cpu')
-                     
-                     
-        rms = rmse(Im, fgp_dtv_cpu)
-        pars['rmse'] = rms
-        
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("##############FGP dTV GPU##################")
-        start_time = timeit.default_timer()
-        try:
-            fgp_dtv_gpu = FGP_dTV(pars['input'], 
-                      pars['refdata'], 
-                      pars['regularisation_parameter'],
-                      pars['number_of_iterations'],
-                      pars['tolerance_constant'], 
-                      pars['eta_const'], 
-                      pars['methodTV'],
-                      pars['nonneg'],
-                      pars['printingOut'],'gpu')
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-        rms = rmse(Im, fgp_dtv_gpu)
-        pars['rmse'] = rms
-        pars['algorithm'] = FGP_dTV
-        txtstr = printParametersToString(pars)
-        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-        print (txtstr)
-        print ("--------Compare the results--------")
-        tolerance = 1e-05
-        diff_im = np.zeros(np.shape(fgp_dtv_cpu))
-        diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
-        diff_im[diff_im > tolerance] = 1
-        self.assertLessEqual(diff_im.sum(), 1)
-
-    def test_cpu_ROF_TV(self):
-        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
-        
-        filename = os.path.join("lena_gray_512.tif")
-
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        Im = Im/255
-        
-        """
-        # read noiseless image
-        Im = plt.imread(filename)
-        Im = np.asarray(Im, dtype='float32')
-        """
-        tolerance = 1e-05
-        rms_rof_exp = 8.313131464999238e-05 #expected value for ROF model
-
-        # set parameters for ROF-TV
-        pars_rof_tv = {'algorithm': ROF_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04,\
-                            'number_of_iterations': 50,\
-                            'time_marching_parameter': 0.00001
-                            }
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing ROF-TV (2D, CPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        rof_cpu = ROF_TV(pars_rof_tv['input'],
-             pars_rof_tv['regularisation_parameter'],
-             pars_rof_tv['number_of_iterations'],
-             pars_rof_tv['time_marching_parameter'],'cpu')
-        rms_rof = rmse(Im, rof_cpu)
-        
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
-    def test_cpu_FGP_TV(self):
-        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
-        
-        filename = os.path.join("lena_gray_512.tif")
-
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        Im = Im/255
-        """
-        # read noiseless image
-        Im = plt.imread(filename)
-        Im = np.asarray(Im, dtype='float32')
-        """
-        tolerance = 1e-05
-        rms_fgp_exp = 0.019152347 #expected value for FGP model
-        
-        pars_fgp_tv = {'algorithm' : FGP_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04, \
-                            'number_of_iterations' :50 ,\
-                            'tolerance_constant':1e-06,\
-                            'methodTV': 0 ,\
-                            'nonneg': 0 ,\
-                            'printingOut': 0 
-                            }
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing FGP-TV (2D, CPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        fgp_cpu = FGP_TV(pars_fgp_tv['input'], 
-              pars_fgp_tv['regularisation_parameter'],
-              pars_fgp_tv['number_of_iterations'],
-              pars_fgp_tv['tolerance_constant'], 
-              pars_fgp_tv['methodTV'],
-              pars_fgp_tv['nonneg'],
-              pars_fgp_tv['printingOut'],'cpu')  
-        rms_fgp = rmse(Im, fgp_cpu)
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
-
-    def test_gpu_ROF(self):
-        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
-        filename = os.path.join("lena_gray_512.tif")
-
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)
-        Im = np.asarray(Im, dtype='float32')
-        Im = Im/255
-        
-        tolerance = 1e-05
-        rms_rof_exp = 8.313131464999238e-05 #expected value for ROF model
-        
-        # set parameters for ROF-TV
-        pars_rof_tv = {'algorithm': ROF_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04,\
-                            'number_of_iterations': 50,\
-                            'time_marching_parameter': 0.00001
-                            }
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing ROF-TV (2D, GPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        try:
-            rof_gpu = ROF_TV(pars_rof_tv['input'],
-             pars_rof_tv['regularisation_parameter'],
-             pars_rof_tv['number_of_iterations'],
-             pars_rof_tv['time_marching_parameter'],'gpu')
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-
-        rms_rof = rmse(Im, rof_gpu)
-        # now compare obtained rms with the expected value
-        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
-    
-    def test_gpu_FGP(self):
-        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
-        filename = os.path.join("lena_gray_512.tif")
-
-        plt = TiffReader()
-        # read image
-        Im = plt.imread(filename)                     
-        Im = np.asarray(Im, dtype='float32')
-        Im = Im/255
-        tolerance = 1e-05
-        
-        rms_fgp_exp = 0.019152347 #expected value for FGP model
-        
-        # set parameters for FGP-TV
-        pars_fgp_tv = {'algorithm' : FGP_TV, \
-                            'input' : Im,\
-                            'regularisation_parameter':0.04, \
-                            'number_of_iterations' :50 ,\
-                            'tolerance_constant':1e-06,\
-                            'methodTV': 0 ,\
-                            'nonneg': 0 ,\
-                            'printingOut': 0 
-                            }
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        print ("_________testing FGP-TV (2D, GPU)__________")
-        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-        try:
-            fgp_gpu = FGP_TV(pars_fgp_tv['input'], 
-              pars_fgp_tv['regularisation_parameter'],
-              pars_fgp_tv['number_of_iterations'],
-              pars_fgp_tv['tolerance_constant'], 
-              pars_fgp_tv['methodTV'],
-              pars_fgp_tv['nonneg'],
-              pars_fgp_tv['printingOut'],'gpu')  
-        except ValueError as ve:
-            self.skipTest("Results not comparable. GPU computing error.")
-        rms_fgp = rmse(Im, fgp_gpu)
-        # now compare obtained rms with the expected value
-
-        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/Wrappers/Python/demos/demo_cpu_inpainters.py b/Wrappers/Python/demos/demo_cpu_inpainters.py
deleted file mode 100644
index 3b4191b..0000000
--- a/Wrappers/Python/demos/demo_cpu_inpainters.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Demonstration of CPU inpainters
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from scipy import io
-from ccpi.filters.regularisers import NDF_INP, NVM_INP
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'maskData':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-
-# read sinogram and the mask
-filename = os.path.join(".." , ".." , ".." , "data" ,"SinoInpaint.mat")
-sino = io.loadmat(filename)
-sino_full = sino.get('Sinogram')
-Mask = sino.get('Mask')
-[angles_dim,detectors_dim] = sino_full.shape
-sino_full = sino_full/np.max(sino_full)
-#apply mask to sinogram
-sino_cut = sino_full*(1-Mask)
-#sino_cut_new = np.zeros((angles_dim,detectors_dim),'float32')
-#sino_cut_new = sino_cut.copy(order='c')
-#sino_cut_new[:] = sino_cut[:]
-sino_cut_new = np.ascontiguousarray(sino_cut, dtype=np.float32);
-#mask = np.zeros((angles_dim,detectors_dim),'uint8')
-#mask =Mask.copy(order='c')
-#mask[:] = Mask[:]
-mask = np.ascontiguousarray(Mask, dtype=np.uint8);
-
-plt.figure(1)
-plt.subplot(121)
-plt.imshow(sino_cut_new,vmin=0.0, vmax=1)
-plt.title('Missing Data sinogram')
-plt.subplot(122)
-plt.imshow(mask)
-plt.title('Mask')
-plt.show()
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Inpainting using linear diffusion (2D)__")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure(2)
-plt.suptitle('Performance of linear inpainting using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Missing data sinogram')
-imgplot = plt.imshow(sino_cut_new,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF_INP, \
-        'input' : sino_cut_new,\
-        'maskData' : mask,\
-        'regularisation_parameter':5000,\
-        'edge_parameter':0,\
-        'number_of_iterations' :5000 ,\
-        'time_marching_parameter':0.000075,\
-        'penalty_type':0
-        }
-        
-start_time = timeit.default_timer()
-ndf_inp_linear = NDF_INP(pars['input'],
-              pars['maskData'],
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'])
-             
-rms = rmse(sino_full, ndf_inp_linear)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_inp_linear, cmap="gray")
-plt.title('{}'.format('Linear diffusion inpainting results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_Inpainting using nonlinear diffusion (2D)_")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure(3)
-plt.suptitle('Performance of nonlinear diffusion inpainting using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Missing data sinogram')
-imgplot = plt.imshow(sino_cut_new,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF_INP, \
-        'input' : sino_cut_new,\
-        'maskData' : mask,\
-        'regularisation_parameter':80,\
-        'edge_parameter':0.00009,\
-        'number_of_iterations' :1500 ,\
-        'time_marching_parameter':0.000008,\
-        'penalty_type':1
-        }
-        
-start_time = timeit.default_timer()
-ndf_inp_nonlinear = NDF_INP(pars['input'],
-              pars['maskData'],
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'])
-             
-rms = rmse(sino_full, ndf_inp_nonlinear)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_inp_nonlinear, cmap="gray")
-plt.title('{}'.format('Nonlinear diffusion inpainting results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("Inpainting using nonlocal vertical marching")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure(4)
-plt.suptitle('Performance of NVM inpainting using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Missing data sinogram')
-imgplot = plt.imshow(sino_cut,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NVM_INP, \
-        'input' : sino_cut_new,\
-        'maskData' : mask,\
-        'SW_increment': 1,\
-        'number_of_iterations' : 150
-        }
-        
-start_time = timeit.default_timer()
-(nvm_inp, mask_upd) = NVM_INP(pars['input'],
-              pars['maskData'],
-              pars['SW_increment'],
-              pars['number_of_iterations'])
-             
-rms = rmse(sino_full, nvm_inp)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(nvm_inp, cmap="gray")
-plt.title('{}'.format('Nonlocal Vertical Marching inpainting results'))
-#%%
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
deleted file mode 100644
index e6befa9..0000000
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ /dev/null
@@ -1,572 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of CPU regularisers 
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, TNV, NDF, Diff4th
-from ccpi.filters.regularisers import PatchSelect, NLTV
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255.0
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-
-# change dims to check that modules work with non-squared images
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________ROF-TV (2D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of ROF-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 1200,\
-        'time_marching_parameter': 0.0025        
-        }
-print ("#############ROF TV CPU####################")
-start_time = timeit.default_timer()
-rof_cpu = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'cpu')
-rms = rmse(Im, rof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-TV (2D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :2000 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP TV CPU####################")
-start_time = timeit.default_timer()
-fgp_cpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, fgp_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________SB-TV (2D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of SB-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############SB TV CPU####################")
-start_time = timeit.default_timer()
-sb_cpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, sb_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_____Total Generalised Variation (2D)______")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :1350 ,\
-        'LipshitzConstant' :12 ,\
-        }
-        
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_cpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'cpu')
-             
-             
-rms = rmse(Im, tgv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("______________LLT- ROF (2D)________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : u0,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.01, \
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-        
-print ("#############LLT- ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_cpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-
-rms = rmse(Im, lltrof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("________________NDF (2D)___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NDF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : u0,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':1
-        }
-        
-print ("#############NDF CPU################")
-start_time = timeit.default_timer()
-ndf_cpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'cpu')  
-             
-rms = rmse(Im, ndf_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of Diff4th regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############Diff4th CPU################")
-start_time = timeit.default_timer()
-diff4_cpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-             
-rms = rmse(Im, diff4_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal patches pre-calculation____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-start_time = timeit.default_timer()
-# set parameters
-pars = {'algorithm' : PatchSelect, \
-        'input' : u0,\
-        'searchwindow': 7, \
-        'patchwindow': 2,\
-        'neighbours' : 15 ,\
-        'edge_parameter':0.18}
-
-H_i, H_j, Weights = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'cpu')
-              
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-"""
-plt.figure()
-plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
-plt.show()
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal Total Variation penalty____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NLTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-pars2 = {'algorithm' : NLTV, \
-        'input' : u0,\
-        'H_i': H_i, \
-        'H_j': H_j,\
-        'H_k' : 0,\
-        'Weights' : Weights,\
-        'regularisation_parameter': 0.04,\
-        'iterations': 3
-        }
-start_time = timeit.default_timer()
-nltv_cpu = NLTV(pars2['input'], 
-              pars2['H_i'],
-              pars2['H_j'], 
-              pars2['H_k'],
-              pars2['Weights'],
-              pars2['regularisation_parameter'],
-              pars2['iterations'])
-
-rms = rmse(Im, nltv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(nltv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_____________FGP-dTV (2D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : u0,\
-        'refdata' : u_ref,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :2000 ,\
-        'tolerance_constant':1e-06,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP dTV CPU####################")
-start_time = timeit.default_timer()
-fgp_dtv_cpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')
-             
-rms = rmse(Im, fgp_dtv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("__________Total nuclear Variation__________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TNV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-channelsNo = 5
-noisyVol = np.zeros((channelsNo,N,M),dtype='float32')
-idealVol = np.zeros((channelsNo,N,M),dtype='float32')
-
-for i in range (channelsNo):
-    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
-    idealVol[i,:,:] = Im
-
-# set parameters
-pars = {'algorithm' : TNV, \
-        'input' : noisyVol,\
-        'regularisation_parameter': 0.04, \
-        'number_of_iterations' : 200 ,\
-        'tolerance_constant':1e-05
-        }
-        
-print ("#############TNV CPU#################")
-start_time = timeit.default_timer()
-tnv_cpu = TNV(pars['input'],           
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'])
-             
-rms = rmse(idealVol, tnv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tnv_cpu[3,:,:], cmap="gray")
-plt.title('{}'.format('CPU results'))
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers3D.py b/Wrappers/Python/demos/demo_cpu_regularisers3D.py
deleted file mode 100644
index 2d2fc22..0000000
--- a/Wrappers/Python/demos/demo_cpu_regularisers3D.py
+++ /dev/null
@@ -1,458 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of 3D CPU regularisers 
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-
-# change dims to check that modules work with non-squared images
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-slices = 15
-
-noisyVol = np.zeros((slices,N,M),dtype='float32')
-noisyRef = np.zeros((slices,N,M),dtype='float32')
-idealVol = np.zeros((slices,N,M),dtype='float32')
-
-for i in range (slices):
-    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
-    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
-    idealVol[i,:,:] = Im
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________ROF-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of ROF-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy 15th slice of a volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 500,\
-        'time_marching_parameter': 0.0025
-        }
-print ("#############ROF TV CPU####################")
-start_time = timeit.default_timer()
-rof_cpu3D = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'cpu')
-rms = rmse(idealVol, rof_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using ROF-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-TV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP TV CPU####################")
-start_time = timeit.default_timer()
-fgp_cpu3D = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(idealVol, fgp_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using FGP-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________SB-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of SB-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############SB TV CPU####################")
-start_time = timeit.default_timer()
-sb_cpu3D = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'cpu')
-             
-rms = rmse(idealVol, sb_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using SB-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________LLT-ROF (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : noisyVol,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.015, \
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-
-print ("#############LLT ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_cpu3D = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-
-rms = rmse(idealVol, lltrof_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using LLT-ROF'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________TGV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :250 ,\
-        'LipshitzConstant' :12 ,\
-        }
-
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_cpu3D = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'cpu')
-             
-
-rms = rmse(idealVol, tgv_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using TGV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("________________NDF (3D)___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NDF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-        
-print ("#############NDF CPU################")
-start_time = timeit.default_timer()
-ndf_cpu3D = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'])  
-             
-rms = rmse(idealVol, ndf_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using NDF iterations'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of Diff4th regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : noisyVol,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############Diff4th CPU################")
-start_time = timeit.default_timer()
-diff4th_cpu3D = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'])  
-             
-rms = rmse(idealVol, diff4th_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4th_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using DIFF4th iterations'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-dTV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV,\
-        'input' : noisyVol,\
-        'refdata' : noisyRef,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP dTV CPU####################")
-start_time = timeit.default_timer()
-fgp_dTV_cpu3D = FGP_dTV(pars['input'],
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'],
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')
-             
-             
-rms = rmse(idealVol, fgp_dTV_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV'))
-#%%
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
deleted file mode 100644
index 230a761..0000000
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ /dev/null
@@ -1,790 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of CPU implementation against the GPU one
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from ccpi.filters.regularisers import PatchSelect
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)                     
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________ROF-TV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of ROF-TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 4500,\
-        'time_marching_parameter': 0.00002
-        }
-print ("#############ROF TV CPU####################")
-start_time = timeit.default_timer()
-rof_cpu = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'cpu')
-rms = rmse(Im, rof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############ROF TV GPU##################")
-start_time = timeit.default_timer()
-rof_gpu = ROF_TV(pars['input'], 
-                     pars['regularisation_parameter'],
-                     pars['number_of_iterations'], 
-                     pars['time_marching_parameter'],'gpu')
-                     
-rms = rmse(Im, rof_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = ROF_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(rof_cpu))
-diff_im = abs(rof_cpu - rof_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-TV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of FGP-TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :1200 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP TV CPU####################")
-start_time = timeit.default_timer()
-fgp_cpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, fgp_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-
-print ("##############FGP TV GPU##################")
-start_time = timeit.default_timer()
-fgp_gpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, fgp_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(fgp_cpu))
-diff_im = abs(fgp_cpu - fgp_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________SB-TV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':1e-05,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############SB-TV CPU####################")
-start_time = timeit.default_timer()
-sb_cpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, sb_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-
-print ("##############SB TV GPU##################")
-start_time = timeit.default_timer()
-sb_gpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, sb_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = SB_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(sb_cpu))
-diff_im = abs(sb_cpu - sb_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________TGV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of TGV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :400 ,\
-        'LipshitzConstant' :12 ,\
-        }
-        
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_cpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'cpu')
-             
-rms = rmse(Im, tgv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############TGV GPU##################")
-start_time = timeit.default_timer()
-tgv_gpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'gpu')
-                                   
-rms = rmse(Im, tgv_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = TGV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(tgv_gpu))
-diff_im = abs(tgv_cpu - tgv_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________LLT-ROF bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of LLT-ROF regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : u0,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.01, \
-        'number_of_iterations' :4500 ,\
-        'time_marching_parameter' :0.00002 ,\
-        }
-        
-print ("#############LLT- ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_cpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-
-rms = rmse(Im, lltrof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("#############LLT- ROF GPU####################")
-start_time = timeit.default_timer()
-lltrof_gpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-
-rms = rmse(Im, lltrof_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = LLT_ROF
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(lltrof_gpu))
-diff_im = abs(lltrof_cpu - lltrof_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________NDF bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of NDF regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : u0,\
-        'regularisation_parameter':0.06, \
-        'edge_parameter':0.04,\
-        'number_of_iterations' :1000 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-        
-print ("#############NDF CPU####################")
-start_time = timeit.default_timer()
-ndf_cpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'cpu')
-             
-rms = rmse(Im, ndf_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-
-print ("##############NDF GPU##################")
-start_time = timeit.default_timer()
-ndf_gpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'gpu')
-             
-rms = rmse(Im, ndf_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = NDF
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(ndf_cpu))
-diff_im = abs(ndf_cpu - ndf_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of Diff4th regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.001
-        }
-
-print ("#############Diff4th CPU####################")
-start_time = timeit.default_timer()
-diff4th_cpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-             
-rms = rmse(Im, diff4th_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4th_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############Diff4th GPU##################")
-start_time = timeit.default_timer()
-diff4th_gpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 'gpu')
-             
-rms = rmse(Im, diff4th_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = Diff4th
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4th_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(diff4th_cpu))
-diff_im = abs(diff4th_cpu - diff4th_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-dTV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : u0,\
-        'refdata' : u_ref,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :1000 ,\
-        'tolerance_constant':1e-07,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP dTV CPU####################")
-start_time = timeit.default_timer()
-fgp_dtv_cpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')
-             
-             
-rms = rmse(Im, fgp_dtv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############FGP dTV GPU##################")
-start_time = timeit.default_timer()
-fgp_dtv_gpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-rms = rmse(Im, fgp_dtv_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_dTV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(fgp_dtv_cpu))
-diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____Non-local regularisation bench_________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of Nonlocal TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-pars = {'algorithm' : PatchSelect, \
-        'input' : u0,\
-        'searchwindow': 7, \
-        'patchwindow': 2,\
-        'neighbours' : 15 ,\
-        'edge_parameter':0.18}
-
-print ("############## Nonlocal Patches on CPU##################")
-start_time = timeit.default_timer()
-H_i, H_j, WeightsCPU = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'cpu')
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-
-print ("############## Nonlocal Patches on GPU##################")
-start_time = timeit.default_timer()
-start_time = timeit.default_timer()
-H_i, H_j, WeightsGPU = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'gpu')
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(u0))
-diff_im = abs(WeightsCPU[0,:,:] - WeightsGPU[0,:,:])
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,2,2)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
\ No newline at end of file
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
deleted file mode 100644
index e1c6575..0000000
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ /dev/null
@@ -1,518 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of GPU regularisers
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from ccpi.filters.regularisers import PatchSelect, NLTV
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)                     
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________ROF-TV regulariser_____________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the ROF-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 1200,\
-        'time_marching_parameter': 0.0025
-        }
-print ("##############ROF TV GPU##################")
-start_time = timeit.default_timer()
-rof_gpu = ROF_TV(pars['input'], 
-                     pars['regularisation_parameter'],
-                     pars['number_of_iterations'], 
-                     pars['time_marching_parameter'],'gpu')
-                     
-rms = rmse(Im, rof_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = ROF_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-TV regulariser_____________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the FGP-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :1200 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("##############FGP TV GPU##################")
-start_time = timeit.default_timer()
-fgp_gpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, fgp_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________SB-TV regulariser______________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the SB-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("##############SB TV GPU##################")
-start_time = timeit.default_timer()
-sb_gpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, sb_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = SB_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_____Total Generalised Variation (2D)______")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :1250 ,\
-        'LipshitzConstant' :12 ,\
-        }
-        
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_gpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'gpu')  
-             
-             
-rms = rmse(Im, tgv_gpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("______________LLT- ROF (2D)________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : u0,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.01, \
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-        
-print ("#############LLT- ROF GPU####################")
-start_time = timeit.default_timer()
-lltrof_gpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-             
-             
-rms = rmse(Im, lltrof_gpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________NDF regulariser_____________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the NDF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : u0,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-
-print ("##############NDF GPU##################")
-start_time = timeit.default_timer()
-ndf_gpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'gpu')  
-             
-rms = rmse(Im, ndf_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = NDF
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of Diff4th regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############DIFF4th CPU################")
-start_time = timeit.default_timer()
-diff4_gpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-             
-rms = rmse(Im, diff4_gpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal patches pre-calculation____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-start_time = timeit.default_timer()
-# set parameters
-pars = {'algorithm' : PatchSelect, \
-        'input' : u0,\
-        'searchwindow': 7, \
-        'patchwindow': 2,\
-        'neighbours' : 15 ,\
-        'edge_parameter':0.18}
-
-H_i, H_j, Weights = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'gpu')
-              
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-"""
-plt.figure()
-plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
-plt.show()
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal Total Variation penalty____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NLTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-pars2 = {'algorithm' : NLTV, \
-        'input' : u0,\
-        'H_i': H_i, \
-        'H_j': H_j,\
-        'H_k' : 0,\
-        'Weights' : Weights,\
-        'regularisation_parameter': 0.02,\
-        'iterations': 3
-        }
-start_time = timeit.default_timer()
-nltv_cpu = NLTV(pars2['input'], 
-              pars2['H_i'],
-              pars2['H_j'], 
-              pars2['H_k'],
-              pars2['Weights'],
-              pars2['regularisation_parameter'],
-              pars2['iterations'])
-
-rms = rmse(Im, nltv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(nltv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-dTV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : u0,\
-        'refdata' : u_ref,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :2000 ,\
-        'tolerance_constant':1e-06,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("##############FGP dTV GPU##################")
-start_time = timeit.default_timer()
-fgp_dtv_gpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, fgp_dtv_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_dTV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers3D.py b/Wrappers/Python/demos/demo_gpu_regularisers3D.py
deleted file mode 100644
index b6058d2..0000000
--- a/Wrappers/Python/demos/demo_gpu_regularisers3D.py
+++ /dev/null
@@ -1,460 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of GPU regularisers
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)                     
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-
-
-slices = 20
-
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-Im = plt.imread(filename)
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-
-noisyVol = np.zeros((slices,N,N),dtype='float32')
-noisyRef = np.zeros((slices,N,N),dtype='float32')
-idealVol = np.zeros((slices,N,N),dtype='float32')
-
-for i in range (slices):
-    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
-    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
-    idealVol[i,:,:] = Im
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________ROF-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of ROF-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy 15th slice of a volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 500,\
-        'time_marching_parameter': 0.0025        
-        }
-print ("#############ROF TV GPU####################")
-start_time = timeit.default_timer()
-rof_gpu3D = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'gpu')
-rms = rmse(idealVol, rof_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using ROF-TV'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-TV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("#############FGP TV GPU####################")
-start_time = timeit.default_timer()
-fgp_gpu3D = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-
-rms = rmse(idealVol, fgp_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using FGP-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________SB-TV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of SB-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :100 ,\
-        'tolerance_constant':1e-05,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("#############SB TV GPU####################")
-start_time = timeit.default_timer()
-sb_gpu3D = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'gpu')
-
-rms = rmse(idealVol, sb_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using SB-TV'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________LLT-ROF (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : noisyVol,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.015, \
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-
-print ("#############LLT ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_gpu3D = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-
-rms = rmse(idealVol, lltrof_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using LLT-ROF'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________TGV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :600 ,\
-        'LipshitzConstant' :12 ,\
-        }
-
-print ("#############TGV GPU####################")
-start_time = timeit.default_timer()
-tgv_gpu3D = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'gpu')
-             
-
-rms = rmse(idealVol, tgv_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using TGV'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________NDF-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NDF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-
-print ("#############NDF GPU####################")
-start_time = timeit.default_timer()
-ndf_gpu3D = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'gpu')
-
-rms = rmse(idealVol, ndf_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using NDF'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (3D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of DIFF4th regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : noisyVol,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############DIFF4th CPU################")
-start_time = timeit.default_timer()
-diff4_gpu3D = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-             
-rms = rmse(idealVol, diff4_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-dTV (3D)________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-dTV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : noisyVol,\
-        'refdata' : noisyRef,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("#############FGP TV GPU####################")
-start_time = timeit.default_timer()
-fgp_dTV_gpu3D = FGP_dTV(pars['input'],
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'],
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-
-rms = rmse(idealVol, fgp_dTV_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV'))
-#%%
diff --git a/Wrappers/Python/demos/qualitymetrics.py b/Wrappers/Python/demos/qualitymetrics.py
deleted file mode 100644
index 850829e..0000000
--- a/Wrappers/Python/demos/qualitymetrics.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 21 13:34:32 2018
-# quality metrics
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-import numpy as np
-
-def nrmse(im1, im2):
-    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
-    max_val = max(np.max(im1), np.max(im2))
-    min_val = min(np.min(im1), np.min(im2))
-    return 1 - (rmse / (max_val - min_val))
-    
-def rmse(im1, im2):
-    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
-    return rmse
diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in
deleted file mode 100644
index 462edda..0000000
--- a/Wrappers/Python/setup-regularisers.py.in
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-
-import setuptools
-from distutils.core import setup
-from distutils.extension import Extension
-from Cython.Distutils import build_ext
-
-import os
-import sys
-import numpy
-import platform	
-
-cil_version=os.environ['CIL_VERSION']
-if  cil_version == '':
-    print("Please set the environmental variable CIL_VERSION")
-    sys.exit(1)
-	
-library_include_path = ""
-library_lib_path = ""
-try:
-    library_include_path = os.environ['LIBRARY_INC']
-    library_lib_path = os.environ['LIBRARY_LIB']
-except:
-    library_include_path = os.environ['PREFIX']+'/include'
-    pass
-    
-extra_include_dirs = [numpy.get_include(), library_include_path]
-#extra_library_dirs = [os.path.join(library_include_path, "..", "lib")]
-extra_compile_args = []
-extra_library_dirs = [library_lib_path]
-extra_compile_args = []
-extra_link_args = []
-extra_libraries = ['cilreg']
-
-print ("extra_library_dirs " , extra_library_dirs)
-
-extra_include_dirs += [os.path.join(".." , ".." , "Core"),
-                       os.path.join(".." , ".." , "Core",  "regularisers_CPU"),
-                       os.path.join(".." , ".." , "Core",  "inpainters_CPU"),
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_FGP" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_ROF" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_SB" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TGV" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "LLTROF" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "NDF" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "DIFF4th" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "PatchSelect" ) ,
-						   "."]
-
-if platform.system() == 'Windows':				   
-    extra_compile_args[0:] = ['/DWIN32','/EHsc','/DBOOST_ALL_NO_LIB' , '/openmp' ]   
-else:
-    extra_compile_args = ['-fopenmp','-O2', '-funsigned-char', '-Wall', '-std=c++0x']
-    extra_libraries += [@EXTRA_OMP_LIB@]
-    
-setup(
-    name='ccpi',
-	description='CCPi Core Imaging Library - Image regularisers',
-	version=cil_version,
-    cmdclass = {'build_ext': build_ext},
-    ext_modules = [Extension("ccpi.filters.cpu_regularisers",
-                             sources=[os.path.join("." , "src", "cpu_regularisers.pyx" ) ],
-                             include_dirs=extra_include_dirs, 
-							 library_dirs=extra_library_dirs, 
-							 extra_compile_args=extra_compile_args, 
-							 libraries=extra_libraries ), 
-    
-    ],
-	zip_safe = False,	
-	packages = {'ccpi','ccpi.filters'},
-)
-
-
-@SETUP_GPU_WRAPPERS@
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
deleted file mode 100644
index 11a0617..0000000
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ /dev/null
@@ -1,685 +0,0 @@
-# distutils: language=c++
-"""
-Copyright 2018 CCPi
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-Author: Edoardo Pasca, Daniil Kazantsev
-"""
-
-import cython
-import numpy as np
-cimport numpy as np
-
-cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
-cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
-cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
-cdef extern float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
-cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
-cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ);
-cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
-cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
-cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
-
-cdef extern float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
-cdef extern float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
-cdef extern float TV_energy2D(float *U, float *U0, float *E_val, float lambdaPar, int type, int dimX, int dimY);
-cdef extern float TV_energy3D(float *U, float *U0, float *E_val, float lambdaPar, int type, int dimX, int dimY, int dimZ);
-#****************************************************************#
-#********************** Total-variation ROF *********************#
-#****************************************************************#
-def TV_ROF_CPU(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter):
-    if inputData.ndim == 2:
-        return TV_ROF_2D(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter)
-    elif inputData.ndim == 3:
-        return TV_ROF_3D(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter)
-
-def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb,                     
-                     float marching_step_parameter):
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    # Run ROF iterations for 2D data 
-    TV_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, iterationsNumb, marching_step_parameter, dims[1], dims[0], 1)
-    
-    return outputData
-            
-def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb,
-                     float marching_step_parameter):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-           
-    # Run ROF iterations for 3D data 
-    TV_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, marching_step_parameter, dims[2], dims[1], dims[0])
-
-    return outputData
-
-#****************************************************************#
-#********************** Total-variation FGP *********************#
-#****************************************************************#
-#******** Total-variation Fast-Gradient-Projection (FGP)*********#
-def TV_FGP_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM):
-    if inputData.ndim == 2:
-        return TV_FGP_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
-    elif inputData.ndim == 3:
-        return TV_FGP_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
-
-def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb, 
-                     float tolerance_param,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-                         
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    #/* Run FGP-TV iterations for 2D data */
-    TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
-                       iterationsNumb, 
-                       tolerance_param,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[1],dims[0],1)
-    
-    return outputData        
-            
-def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb, 
-                     float tolerance_param,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
-           
-    #/* Run FGP-TV iterations for 3D data */
-    TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
-                       iterationsNumb, 
-                       tolerance_param,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[2], dims[1], dims[0])
-    return outputData 
-
-#***************************************************************#
-#********************** Total-variation SB *********************#
-#***************************************************************#
-#*************** Total-variation Split Bregman (SB)*************#
-def TV_SB_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM):
-    if inputData.ndim == 2:
-        return TV_SB_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
-    elif inputData.ndim == 3:
-        return TV_SB_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
-
-def TV_SB_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb, 
-                     float tolerance_param,
-                     int methodTV,
-                     int printM):
-                         
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    #/* Run SB-TV iterations for 2D data */
-    SB_TV_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
-                       iterationsNumb, 
-                       tolerance_param,
-                       methodTV,
-                       printM,
-                       dims[1],dims[0],1)
-    
-    return outputData        
-            
-def TV_SB_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb, 
-                     float tolerance_param,
-                     int methodTV,
-                     int printM):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
-           
-    #/* Run SB-TV iterations for 3D data */
-    SB_TV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
-                       iterationsNumb, 
-                       tolerance_param,
-                       methodTV,
-                       printM,
-                       dims[2], dims[1], dims[0])
-    return outputData 
-
-#***************************************************************#
-#***************** Total Generalised Variation *****************#
-#***************************************************************#
-def TGV_CPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
-    if inputData.ndim == 2:
-        return TGV_2D(inputData, regularisation_parameter, alpha1, alpha0, 
-                      iterations, LipshitzConst)
-    elif inputData.ndim == 3:
-        return TGV_3D(inputData, regularisation_parameter, alpha1, alpha0, 
-                      iterations, LipshitzConst)
-
-def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float alpha1,
-                     float alpha0,
-                     int iterationsNumb, 
-                     float LipshitzConst):
-                         
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    #/* Run TGV iterations for 2D data */
-    TGV_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
-                       alpha1,
-                       alpha0,
-                       iterationsNumb, 
-                       LipshitzConst,
-                       dims[1],dims[0],1)
-    return outputData
-def TGV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float alpha1,
-                     float alpha0,
-                     int iterationsNumb, 
-                     float LipshitzConst):
-                         
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
-                   
-    #/* Run TGV iterations for 3D data */
-    TGV_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, 
-                       alpha1,
-                       alpha0,
-                       iterationsNumb, 
-                       LipshitzConst,
-                       dims[2], dims[1], dims[0])
-    return outputData
-
-#***************************************************************#
-#******************* ROF - LLT regularisation ******************#
-#***************************************************************#
-def LLT_ROF_CPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter):
-    if inputData.ndim == 2:
-        return LLT_ROF_2D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
-    elif inputData.ndim == 3:
-        return LLT_ROF_3D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
-
-def LLT_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameterROF,
-                     float regularisation_parameterLLT,
-                     int iterations, 
-                     float time_marching_parameter):
-                         
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    #/* Run ROF-LLT iterations for 2D data */
-    LLT_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)
-    return outputData
-
-def LLT_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameterROF,
-                     float regularisation_parameterLLT,
-                     int iterations, 
-                     float time_marching_parameter):
-						 
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
-           
-    #/* Run ROF-LLT iterations for 3D data */
-    LLT_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])
-    return outputData 
-
-#****************************************************************#
-#**************Directional Total-variation FGP ******************#
-#****************************************************************#
-#******** Directional TV Fast-Gradient-Projection (FGP)*********#
-def dTV_FGP_CPU(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM):
-    if inputData.ndim == 2:
-        return dTV_FGP_2D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
-    elif inputData.ndim == 3:
-        return dTV_FGP_3D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
-
-def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-               np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
-                     float regularisation_parameter,
-                     int iterationsNumb, 
-                     float tolerance_param,
-                     float eta_const,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-                         
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    #/* Run FGP-dTV iterations for 2D data */
-    dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, 
-                       iterationsNumb, 
-                       tolerance_param,
-                       eta_const,
-                       methodTV,                       
-                       nonneg,
-                       printM,
-                       dims[1], dims[0], 1)
-    
-    return outputData        
-            
-def dTV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
-               np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
-                     float regularisation_parameter,
-                     int iterationsNumb, 
-                     float tolerance_param,
-                     float eta_const,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
-           
-    #/* Run FGP-dTV iterations for 3D data */
-    dTV_FGP_CPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], regularisation_parameter,
-                       iterationsNumb, 
-                       tolerance_param,
-                       eta_const,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[2], dims[1], dims[0])
-    return outputData
-    
-#****************************************************************#
-#*********************Total Nuclear Variation********************#
-#****************************************************************#
-def TNV_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param):
-    if inputData.ndim == 2:
-        return 
-    elif inputData.ndim == 3:
-        return TNV_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param)
-
-def TNV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterationsNumb,
-                     float tolerance_param):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-           
-    # Run TNV iterations for 3D (X,Y,Channels) data 
-    TNV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, tolerance_param, dims[2], dims[1], dims[0])
-    return outputData
-#****************************************************************#
-#***************Nonlinear (Isotropic) Diffusion******************#
-#****************************************************************#
-def NDF_CPU(inputData, regularisation_parameter, edge_parameter, iterationsNumb,time_marching_parameter, penalty_type):
-    if inputData.ndim == 2:
-        return NDF_2D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
-    elif inputData.ndim == 3:
-        return NDF_3D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
-
-def NDF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,                     
-                     float time_marching_parameter,
-                     int penalty_type):
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')   
-    
-    # Run Nonlinear Diffusion iterations for 2D data 
-    Diffusion_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
-    return outputData
-            
-def NDF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,                     
-                     float time_marching_parameter,
-                     int penalty_type):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-    
-    # Run Nonlinear Diffusion iterations for  3D data 
-    Diffusion_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
-
-    return outputData
-
-#****************************************************************#
-#*************Anisotropic Fourth-Order diffusion*****************#
-#****************************************************************#
-def Diff4th_CPU(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter):
-    if inputData.ndim == 2:
-        return Diff4th_2D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter)
-    elif inputData.ndim == 3:
-        return Diff4th_3D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter)
-
-def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,                     
-                     float time_marching_parameter):
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')   
-    
-    # Run Anisotropic Fourth-Order diffusion for 2D data 
-    Diffus4th_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)
-    return outputData
-          
-def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,
-                     float time_marching_parameter):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-    
-    # Run Anisotropic Fourth-Order diffusion for  3D data 
-    Diffus4th_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])
-
-    return outputData
-
-#****************************************************************#
-#***************Patch-based weights calculation******************#
-#****************************************************************#
-def PATCHSEL_CPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
-    if inputData.ndim == 2:
-        return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
-    elif inputData.ndim == 3:
-        return 1
-def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
-                     int searchwindow,
-                     int patchwindow,
-                     int neighbours,
-                     float edge_parameter):
-    cdef long dims[3]
-    dims[0] = neighbours
-    dims[1] = inputData.shape[0]
-    dims[2] = inputData.shape[1]
-    
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
-            np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
-    
-    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
-            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
-            
-    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
-            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
-
-    # Run patch-based weight selection function
-    PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow,  neighbours,  edge_parameter, 1)
-    return H_i, H_j, Weights
-"""
-def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
-                     int searchwindow,
-                     int patchwindow,
-                     int neighbours,
-                     float edge_parameter):
-    cdef long dims[4]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    dims[3] = neighbours
-    
-    cdef np.ndarray[np.float32_t, ndim=4, mode="c"] Weights = \
-            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='float32')
-    
-    cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_i = \
-            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
-            
-    cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_j = \
-            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
-            
-    cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_k = \
-            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
-
-    # Run patch-based weight selection function
-    PatchSelect_CPU_main(&inputData[0,0,0], &H_i[0,0,0,0], &H_j[0,0,0,0], &H_k[0,0,0,0], &Weights[0,0,0,0], dims[2], dims[1], dims[0], searchwindow, patchwindow,  neighbours, edge_parameter, 1)
-    return H_i, H_j, H_k, Weights
-"""
-
-#****************************************************************#
-#***************Non-local Total Variation******************#
-#****************************************************************#
-def NLTV_CPU(inputData, H_i, H_j, H_k, Weights, regularisation_parameter, iterations):
-    if inputData.ndim == 2:
-        return NLTV_2D(inputData, H_i, H_j, Weights, regularisation_parameter, iterations)
-    elif inputData.ndim == 3:
-        return 1
-def NLTV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
-                     np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i,
-                     np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j,
-                     np.ndarray[np.float32_t, ndim=3, mode="c"] Weights,
-                     float regularisation_parameter,
-                     int iterations):
-
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    neighbours = H_i.shape[0]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-    
-    # Run nonlocal TV regularisation
-    Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations)
-    return outputData
-
-#*********************Inpainting WITH****************************#
-#***************Nonlinear (Isotropic) Diffusion******************#
-#****************************************************************#
-def NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type):
-    if inputData.ndim == 2:
-        return NDF_INP_2D(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
-    elif inputData.ndim == 3:
-        return NDF_INP_3D(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
-
-def NDF_INP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,
-                     float time_marching_parameter,
-                     int penalty_type):
-
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-
-
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-    
-    # Run Inpaiting by Diffusion iterations for 2D data 
-    Diffusion_Inpaint_CPU_main(&inputData[0,0], &maskData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
-    return outputData
-            
-def NDF_INP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     np.ndarray[np.uint8_t, ndim=3, mode="c"] maskData,
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,
-                     float time_marching_parameter,
-                     int penalty_type):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-    
-    # Run Inpaiting by Diffusion iterations for 3D data 
-    Diffusion_Inpaint_CPU_main(&inputData[0,0,0], &maskData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
-
-    return outputData
-#*********************Inpainting WITH****************************#
-#***************Nonlocal Vertical Marching method****************#
-#****************************************************************#
-def NVM_INPAINT_CPU(inputData, maskData, SW_increment, iterationsNumb):
-    if inputData.ndim == 2:
-        return NVM_INP_2D(inputData, maskData, SW_increment, iterationsNumb)
-    elif inputData.ndim == 3:
-        return 
-
-def NVM_INP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-               np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
-                     int SW_increment,
-                     int iterationsNumb):
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')   
-    
-    cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData_upd = \
-            np.zeros([dims[0],dims[1]], dtype='uint8')
-    
-    # Run Inpaiting by Nonlocal vertical marching method for 2D data 
-    NonlocalMarching_Inpaint_main(&inputData[0,0], &maskData[0,0], &outputData[0,0], 
-                                  &maskData_upd[0,0],
-                                  SW_increment, iterationsNumb, 1, dims[1], dims[0], 1)
-    
-    return (outputData, maskData_upd)
-
-
-#****************************************************************#
-#***************Calculation of TV-energy functional**************#
-#****************************************************************#
-def TV_ENERGY(inputData, inputData0, regularisation_parameter, typeFunctional):
-    if inputData.ndim == 2:
-        return TV_ENERGY_2D(inputData, inputData0, regularisation_parameter, typeFunctional)
-    elif inputData.ndim == 3:
-        return TV_ENERGY_3D(inputData, inputData0, regularisation_parameter, typeFunctional)
-
-def TV_ENERGY_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                 np.ndarray[np.float32_t, ndim=2, mode="c"] inputData0, 
-                     float regularisation_parameter,
-                     int typeFunctional):
-    
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=1, mode="c"] outputData = \
-            np.zeros([1], dtype='float32')
-                   
-    # run function    
-    TV_energy2D(&inputData[0,0], &inputData0[0,0], &outputData[0], regularisation_parameter, typeFunctional, dims[1], dims[0])
-    
-    return outputData
-            
-def TV_ENERGY_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
-                 np.ndarray[np.float32_t, ndim=3, mode="c"] inputData0, 
-                     float regularisation_parameter,
-                     int typeFunctional):
-						 
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=1, mode="c"] outputData = \
-            np.zeros([1], dtype='float32')
-           
-    # Run function
-    TV_energy3D(&inputData[0,0,0], &inputData0[0,0,0], &outputData[0], regularisation_parameter, typeFunctional, dims[2], dims[1], dims[0])
-
-    return outputData
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
deleted file mode 100644
index b52f669..0000000
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ /dev/null
@@ -1,640 +0,0 @@
-# distutils: language=c++
-"""
-Copyright 2018 CCPi
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-Author: Edoardo Pasca, Daniil Kazantsev
-"""
-
-import cython
-import numpy as np
-cimport numpy as np
-
-CUDAErrorMessage = 'CUDA error'
-
-cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
-cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
-cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
-cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
-cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
-cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
-cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
-cdef extern int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
-cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
-
-# Total-variation Rudin-Osher-Fatemi (ROF)
-def TV_ROF_GPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     time_marching_parameter):
-    if inputData.ndim == 2:
-        return ROFTV2D(inputData, 
-                     regularisation_parameter,
-                     iterations,
-                     time_marching_parameter)
-    elif inputData.ndim == 3:
-        return ROFTV3D(inputData, 
-                     regularisation_parameter,
-                     iterations, 
-                     time_marching_parameter)
-                     
-# Total-variation Fast-Gradient-Projection (FGP)
-def TV_FGP_GPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     nonneg,
-                     printM):
-    if inputData.ndim == 2:
-        return FGPTV2D(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     nonneg,
-                     printM)
-    elif inputData.ndim == 3:
-        return FGPTV3D(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     nonneg,
-                     printM)
-# Total-variation Split Bregman (SB)
-def TV_SB_GPU(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     printM):
-    if inputData.ndim == 2:
-        return SBTV2D(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     printM)
-    elif inputData.ndim == 3:
-        return SBTV3D(inputData,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     methodTV,
-                     printM)
-# LLT-ROF model
-def LLT_ROF_GPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter):
-    if inputData.ndim == 2:
-        return LLT_ROF_GPU2D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
-    elif inputData.ndim == 3:
-        return LLT_ROF_GPU3D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
-# Total Generilised Variation (TGV)
-def TGV_GPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
-    if inputData.ndim == 2:
-        return TGV2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
-    elif inputData.ndim == 3:
-        return TGV3D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
-# Directional Total-variation Fast-Gradient-Projection (FGP)
-def dTV_FGP_GPU(inputData,
-                     refdata,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     eta_const,
-                     methodTV,
-                     nonneg,
-                     printM):
-    if inputData.ndim == 2:
-        return FGPdTV2D(inputData,
-                     refdata,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     eta_const,
-                     methodTV,
-                     nonneg,
-                     printM)
-    elif inputData.ndim == 3:
-        return FGPdTV3D(inputData,
-                     refdata,
-                     regularisation_parameter,
-                     iterations, 
-                     tolerance_param,
-                     eta_const,
-                     methodTV,
-                     nonneg,
-                     printM)
-# Nonlocal Isotropic Diffusion (NDF)
-def NDF_GPU(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter,
-                     penalty_type):
-    if inputData.ndim == 2:
-        return NDF_GPU_2D(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter,
-                     penalty_type)
-    elif inputData.ndim == 3:
-        return NDF_GPU_3D(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter,
-                     penalty_type)
-# Anisotropic Fourth-Order diffusion
-def Diff4th_GPU(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter):
-    if inputData.ndim == 2:
-        return Diff4th_2D(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter)
-    elif inputData.ndim == 3:
-        return Diff4th_3D(inputData,
-                     regularisation_parameter,
-                     edge_parameter,
-                     iterations, 
-                     time_marching_parameter)
-                     
-#****************************************************************#
-#********************** Total-variation ROF *********************#
-#****************************************************************#
-def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float time_marching_parameter):
-    
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1]], dtype='float32')
-          
-    # Running CUDA code here
-    if (TV_ROF_GPU_main(
-            &inputData[0,0], &outputData[0,0], 
-                       regularisation_parameter,
-                       iterations , 
-                       time_marching_parameter, 
-                       dims[1], dims[0], 1)==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-    
-def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float time_marching_parameter):
-    
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (TV_ROF_GPU_main(
-            &inputData[0,0,0], &outputData[0,0,0], 
-                       regularisation_parameter,
-                       iterations , 
-                       time_marching_parameter, 
-                       dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-#****************************************************************#
-#********************** Total-variation FGP *********************#
-#****************************************************************#
-#******** Total-variation Fast-Gradient-Projection (FGP)*********#
-def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float tolerance_param,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-    
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
-                       regularisation_parameter, 
-                       iterations, 
-                       tolerance_param,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[1], dims[0], 1)==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-    
-def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float tolerance_param,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-    
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
-                       regularisation_parameter , 
-                       iterations, 
-                       tolerance_param,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-#***************************************************************#
-#********************** Total-variation SB *********************#
-#***************************************************************#
-#*************** Total-variation Split Bregman (SB)*************#
-def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float tolerance_param,
-                     int methodTV,
-                     int printM):
-    
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],
-                       regularisation_parameter, 
-                       iterations, 
-                       tolerance_param,
-                       methodTV,
-                       printM,
-                       dims[1], dims[0], 1)==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-    
-def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float tolerance_param,
-                     int methodTV,
-                     int printM):
-    
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
-                       regularisation_parameter , 
-                       iterations, 
-                       tolerance_param,
-                       methodTV,
-                       printM,
-                       dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-
-#***************************************************************#
-#************************ LLT-ROF model ************************#
-#***************************************************************#
-#************Joint LLT-ROF model for higher order **************#
-def LLT_ROF_GPU2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameterROF,
-                     float regularisation_parameterLLT,
-                     int iterations, 
-                     float time_marching_parameter):
-    
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-    
-def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameterROF,
-                     float regularisation_parameterLLT,
-                     int iterations, 
-                     float time_marching_parameter):
-    
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-
-#***************************************************************#
-#***************** Total Generalised Variation *****************#
-#***************************************************************#
-def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float alpha1,
-                     float alpha0,
-                     int iterationsNumb, 
-                     float LipshitzConst):
-                         
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-                   
-    #/* Run TGV iterations for 2D data */
-    if (TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
-                       alpha1,
-                       alpha0,
-                       iterationsNumb, 
-                       LipshitzConst,
-                       dims[1],dims[0], 1)==0):
-        return outputData
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-def TGV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float alpha1,
-                     float alpha0,
-                     int iterationsNumb, 
-                     float LipshitzConst):
-    
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (TGV_GPU_main(
-            &inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
-                       alpha1,
-                       alpha0,
-                       iterationsNumb, 
-                       LipshitzConst,
-                       dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-
-#****************************************************************#
-#**************Directional Total-variation FGP ******************#
-#****************************************************************#
-#******** Directional TV Fast-Gradient-Projection (FGP)*********#
-def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-             np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
-                     float regularisation_parameter,
-                     int iterations, 
-                     float tolerance_param,
-                     float eta_const,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-    
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],
-                       regularisation_parameter, 
-                       iterations, 
-                       tolerance_param,
-                       eta_const,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[1], dims[0], 1)==0):
-        return outputData
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-    
-def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-             np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, 
-                     float regularisation_parameter,
-                     int iterations, 
-                     float tolerance_param,
-                     float eta_const,
-                     int methodTV,
-                     int nonneg,
-                     int printM):
-    
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
-          
-    # Running CUDA code here    
-    if (dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],
-                       regularisation_parameter , 
-                       iterations, 
-                       tolerance_param,
-                       eta_const,
-                       methodTV,
-                       nonneg,
-                       printM,
-                       dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-
-#****************************************************************#
-#***************Nonlinear (Isotropic) Diffusion******************#
-#****************************************************************#
-def NDF_GPU_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,                     
-                     float time_marching_parameter,
-                     int penalty_type):
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-    
-    #rangecheck = penalty_type < 1 and penalty_type > 3
-    #if not rangecheck:
-#        raise ValueError('Choose penalty type as 1 for Huber, 2 - Perona-Malik, 3 - Tukey Biweight')
-    
-    # Run Nonlinear Diffusion iterations for 2D data 
-    # Running CUDA code here  
-    if (NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-            
-def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,                     
-                     float time_marching_parameter,
-                     int penalty_type):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')    
-       
-    # Run Nonlinear Diffusion iterations for  3D data 
-    # Running CUDA code here  
-    if (NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-#****************************************************************#
-#************Anisotropic Fourth-Order diffusion******************#
-#****************************************************************#
-def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,
-                     float time_marching_parameter):
-    cdef long dims[2]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    
-    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1]], dtype='float32')
-    
-    # Run Anisotropic Fourth-Order diffusion for 2D data 
-    # Running CUDA code here  
-    if (Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)==0):
-        return outputData
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-            
-def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
-                     float regularisation_parameter,
-                     float edge_parameter,
-                     int iterationsNumb,
-                     float time_marching_parameter):
-    cdef long dims[3]
-    dims[0] = inputData.shape[0]
-    dims[1] = inputData.shape[1]
-    dims[2] = inputData.shape[2]
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
-            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')    
-       
-    # Run Anisotropic Fourth-Order diffusion for  3D data 
-    # Running CUDA code here  
-    if (Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])==0):
-        return outputData;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
-#****************************************************************#
-#************Patch-based weights pre-selection******************#
-#****************************************************************#
-def PATCHSEL_GPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
-    if inputData.ndim == 2:
-        return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
-    elif inputData.ndim == 3:
-        return 1
-def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
-                     int searchwindow,
-                     int patchwindow,
-                     int neighbours,
-                     float edge_parameter):
-    cdef long dims[3]
-    dims[0] = neighbours
-    dims[1] = inputData.shape[0]
-    dims[2] = inputData.shape[1]    
-    
-    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
-            np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
-    
-    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
-            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
-            
-    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
-            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
-
-    # Run patch-based weight selection function
-    if (PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow,  neighbours,  edge_parameter)==0):
-        return H_i, H_j, Weights;
-    else:
-        raise ValueError(CUDAErrorMessage);
-
diff --git a/data/SinoInpaint.mat b/data/SinoInpaint.mat
deleted file mode 100644
index d748fb4..0000000
Binary files a/data/SinoInpaint.mat and /dev/null differ
diff --git a/data/lena_gray_512.tif b/data/lena_gray_512.tif
deleted file mode 100644
index f80cafc..0000000
Binary files a/data/lena_gray_512.tif and /dev/null differ
diff --git a/recipes/regularisers/bld.bat b/recipes/regularisers/bld.bat
deleted file mode 100644
index 43a5286..0000000
--- a/recipes/regularisers/bld.bat
+++ /dev/null
@@ -1,21 +0,0 @@
-IF NOT DEFINED CIL_VERSION (
-ECHO CIL_VERSION Not Defined.
-exit 1
-)
-
-mkdir "%SRC_DIR%\build"
-ROBOCOPY /E "%RECIPE_DIR%\..\..\Core" "%SRC_DIR%\build"
-::ROBOCOPY /E "%RECIPE_DIR%\..\..\Wrappers\python\src" "%SRC_DIR%\build\module"
-cd "%SRC_DIR%\build"
-
-echo "we should be in %SRC_DIR%\build"
-
-cmake -G "NMake Makefiles" "%RECIPE_DIR%\..\..\" -DLIBRARY_LIB="%CONDA_PREFIX%\lib" -DLIBRARY_INC="%CONDA_PREFIX%" -DCMAKE_INSTALL_PREFIX="%PREFIX%\Library" -DCONDA_BUILD=ON -DBUILD_WRAPPERS=OFF
-
-::-DBOOST_LIBRARYDIR="%CONDA_PREFIX%\Library\lib" -DBOOST_INCLUDEDIR="%CONDA_PREFIX%\Library\include" -DBOOST_ROOT="%CONDA_PREFIX%\Library\lib"
-
-:: Build C library
-nmake install
-if errorlevel 1 exit 1
-
-:: Install step
diff --git a/recipes/regularisers/build.sh b/recipes/regularisers/build.sh
deleted file mode 100644
index eaa778e..0000000
--- a/recipes/regularisers/build.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-
-echo build.sh CIL_VERSION: $CIL_VERSION
-#if [ -z "$CIL_VERSION" ]; then
-#    echo "Need to set CIL_VERSION"
-#    exit 1
-#fi  
-#export CIL_VERSION=0.9.1
-
-
-
-mkdir ${SRC_DIR}/build
-cp -rv ${RECIPE_DIR}/../../Core/ ${SRC_DIR}/build
-mkdir ${SRC_DIR}/build/build
-cd ${SRC_DIR}/build/build
-cmake -G "Unix Makefiles" -DLIBRARY_LIB="${CONDA_PREFIX}/lib" -DLIBRARY_INC="${CONDA_PREFIX}" -DCMAKE_INSTALL_PREFIX="${PREFIX}" ../Core
-
-make -j2 VERBOSE=1
-make install
diff --git a/recipes/regularisers/meta.yaml b/recipes/regularisers/meta.yaml
deleted file mode 100644
index 3ffcd1d..0000000
--- a/recipes/regularisers/meta.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-package:
-  name: cil_regulariser
-  version: {{ environ['CIL_VERSION'] }}
-
-
-build:
-  preserve_egg_dir: False
-  script_env: 
-    - CIL_VERSION
-
-requirements:
-  build:
-    - cmake >=3.1
-    - vc 14 # [win and py36] 
-    - vc 14 # [win and py35] 
-    - vc 9  # [win and py27]
-
-  run:
-    - vc 14 # [win and py36]
-    - vc 14 # [win and py35]
-    - vc 9  # [win and py27]
-
-
-about:
-  home: http://www.ccpi.ac.uk
-  license: Apache v2.0
-  summary: Regulariser package from CCPi
diff --git a/run.sh b/run.sh
deleted file mode 100644
index a8e5555..0000000
--- a/run.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash  
-echo "Building CCPi-regularisation Toolkit using CMake"  
-# rm -r build
-# Requires Cython, install it first: 
-# pip install cython
-# mkdir build
-cd build/
-make clean
-# install Python modules only without CUDA
-cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
-# install Python modules only with CUDA
-# cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
-make install
-# cp install/lib/libcilreg.so install/python/ccpi/filters
-cd install/python
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib
-# spyder
-# one can also run Matlab in Linux as:
-# PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" matlab
-- 
cgit v1.2.3


From 61bfe1f57fbda958e24e227e567676fafd7f6d3e Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Thu, 21 Feb 2019 02:11:13 -0500
Subject: restructured sources

---
 Wrappers/Python/conda-recipe/lena_gray_512.tif     | Bin 262598 -> 0 bytes
 build/FindAnacondaEnvironment.cmake                | 154 ++++
 build/run.sh                                       |  19 +
 docs/data/SinoInpaint.mat                          | Bin 0 -> 3335061 bytes
 docs/data/lena_gray_512.tif                        | Bin 0 -> 262598 bytes
 docs/demos/demoMatlab_3Ddenoise.m                  | 178 +++++
 docs/demos/demoMatlab_denoise.m                    | 189 +++++
 docs/demos/demoMatlab_inpaint.m                    |  35 +
 docs/demos/demo_cpu_inpainters.py                  | 192 +++++
 docs/demos/demo_cpu_regularisers.py                | 572 ++++++++++++++
 docs/demos/demo_cpu_regularisers3D.py              | 458 ++++++++++++
 docs/demos/demo_cpu_vs_gpu_regularisers.py         | 790 ++++++++++++++++++++
 docs/demos/demo_gpu_regularisers.py                | 518 +++++++++++++
 docs/demos/demo_gpu_regularisers3D.py              | 460 ++++++++++++
 docs/demos/qualitymetrics.py                       |  18 +
 recipe/bld.bat                                     |  20 +
 recipe/build.sh                                    |  18 +
 recipe/conda_build_config.yaml                     |   9 +
 recipe/meta.yaml                                   |  40 +
 recipe/run_test.py                                 | 819 +++++++++++++++++++++
 src/CMakeLists.txt                                 |  19 +
 src/Core/CCPiDefines.h                             |  35 +
 src/Core/CMakeLists.txt                            | 151 ++++
 src/Core/inpainters_CPU/Diffusion_Inpaint_core.c   | 322 ++++++++
 src/Core/inpainters_CPU/Diffusion_Inpaint_core.h   |  61 ++
 .../inpainters_CPU/NonlocalMarching_Inpaint_core.c | 188 +++++
 .../inpainters_CPU/NonlocalMarching_Inpaint_core.h |  54 ++
 src/Core/regularisers_CPU/Diffus4th_order_core.c   | 250 +++++++
 src/Core/regularisers_CPU/Diffus4th_order_core.h   |  55 ++
 src/Core/regularisers_CPU/Diffusion_core.c         | 307 ++++++++
 src/Core/regularisers_CPU/Diffusion_core.h         |  59 ++
 src/Core/regularisers_CPU/FGP_TV_core.c            | 321 ++++++++
 src/Core/regularisers_CPU/FGP_TV_core.h            |  63 ++
 src/Core/regularisers_CPU/FGP_dTV_core.c           | 441 +++++++++++
 src/Core/regularisers_CPU/FGP_dTV_core.h           |  72 ++
 src/Core/regularisers_CPU/LLT_ROF_core.c           | 410 +++++++++++
 src/Core/regularisers_CPU/LLT_ROF_core.h           |  65 ++
 src/Core/regularisers_CPU/Nonlocal_TV_core.c       | 173 +++++
 src/Core/regularisers_CPU/Nonlocal_TV_core.h       |  61 ++
 src/Core/regularisers_CPU/PatchSelect_core.c       | 345 +++++++++
 src/Core/regularisers_CPU/PatchSelect_core.h       |  63 ++
 src/Core/regularisers_CPU/ROF_TV_core.c            | 289 ++++++++
 src/Core/regularisers_CPU/ROF_TV_core.h            |  57 ++
 src/Core/regularisers_CPU/SB_TV_core.c             | 368 +++++++++
 src/Core/regularisers_CPU/SB_TV_core.h             |  61 ++
 src/Core/regularisers_CPU/TGV_core.c               | 487 ++++++++++++
 src/Core/regularisers_CPU/TGV_core.h               |  73 ++
 src/Core/regularisers_CPU/TNV_core.c               | 452 ++++++++++++
 src/Core/regularisers_CPU/TNV_core.h               |  47 ++
 src/Core/regularisers_CPU/utils.c                  | 117 +++
 src/Core/regularisers_CPU/utils.h                  |  34 +
 src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu  | 268 +++++++
 src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h   |   8 +
 src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu      | 473 ++++++++++++
 src/Core/regularisers_GPU/LLT_ROF_GPU_core.h       |   8 +
 src/Core/regularisers_GPU/NonlDiff_GPU_core.cu     | 345 +++++++++
 src/Core/regularisers_GPU/NonlDiff_GPU_core.h      |   8 +
 src/Core/regularisers_GPU/PatchSelect_GPU_core.cu  | 460 ++++++++++++
 src/Core/regularisers_GPU/PatchSelect_GPU_core.h   |   8 +
 src/Core/regularisers_GPU/TGV_GPU_core.cu          | 625 ++++++++++++++++
 src/Core/regularisers_GPU/TGV_GPU_core.h           |   8 +
 src/Core/regularisers_GPU/TV_FGP_GPU_core.cu       | 564 ++++++++++++++
 src/Core/regularisers_GPU/TV_FGP_GPU_core.h        |   9 +
 src/Core/regularisers_GPU/TV_ROF_GPU_core.cu       | 358 +++++++++
 src/Core/regularisers_GPU/TV_ROF_GPU_core.h        |   8 +
 src/Core/regularisers_GPU/TV_SB_GPU_core.cu        | 552 ++++++++++++++
 src/Core/regularisers_GPU/TV_SB_GPU_core.h         |  10 +
 src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu      | 741 +++++++++++++++++++
 src/Core/regularisers_GPU/dTV_FGP_GPU_core.h       |   9 +
 src/Core/regularisers_GPU/shared.h                 |  42 ++
 src/Matlab/CMakeLists.txt                          | 147 ++++
 src/Matlab/mex_compile/compileCPU_mex_Linux.m      |  81 ++
 src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m    | 135 ++++
 src/Matlab/mex_compile/compileGPU_mex.m            |  74 ++
 .../mex_compile/installed/MEXed_files_location.txt |   0
 .../mex_compile/regularisers_CPU/Diffusion_4thO.c  |  77 ++
 src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c   |  97 +++
 src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c  | 114 +++
 src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c  |  82 +++
 src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c |  89 +++
 .../mex_compile/regularisers_CPU/NonlDiff_Inp.c    | 103 +++
 .../regularisers_CPU/NonlocalMarching_Inpaint.c    |  84 +++
 .../mex_compile/regularisers_CPU/Nonlocal_TV.c     |  88 +++
 .../mex_compile/regularisers_CPU/PatchSelect.c     |  92 +++
 src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c   |  77 ++
 src/Matlab/mex_compile/regularisers_CPU/SB_TV.c    |  91 +++
 src/Matlab/mex_compile/regularisers_CPU/TGV.c      |  83 +++
 src/Matlab/mex_compile/regularisers_CPU/TNV.c      |  74 ++
 .../mex_compile/regularisers_CPU/TV_energy.c       |  72 ++
 .../regularisers_GPU/Diffusion_4thO_GPU.cpp        |  77 ++
 .../mex_compile/regularisers_GPU/FGP_TV_GPU.cpp    |  97 +++
 .../mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp   | 113 +++
 .../mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp   |  83 +++
 .../mex_compile/regularisers_GPU/NonlDiff_GPU.cpp  |  92 +++
 .../mex_compile/regularisers_GPU/ROF_TV_GPU.cpp    |  74 ++
 .../mex_compile/regularisers_GPU/SB_TV_GPU.cpp     |  91 +++
 .../mex_compile/regularisers_GPU/TGV_GPU.cpp       |  79 ++
 src/Matlab/supp/RMSE.m                             |   7 +
 src/Matlab/supp/my_red_yellowMAP.mat               | Bin 0 -> 1761 bytes
 src/Python/CMakeLists.txt                          | 141 ++++
 src/Python/ccpi/__init__.py                        |   0
 src/Python/ccpi/filters/__init__.py                |   0
 src/Python/ccpi/filters/regularisers.py            | 214 ++++++
 src/Python/setup-regularisers.py.in                |  75 ++
 src/Python/src/cpu_regularisers.pyx                | 685 +++++++++++++++++
 src/Python/src/gpu_regularisers.pyx                | 640 ++++++++++++++++
 test/lena_gray_512.tif                             | Bin 0 -> 262598 bytes
 test/test_ROF_TV.py                                | 127 ++++
 test/testroutines.py                               |  37 +
 109 files changed, 18785 insertions(+)
 delete mode 100644 Wrappers/Python/conda-recipe/lena_gray_512.tif
 create mode 100644 build/FindAnacondaEnvironment.cmake
 create mode 100644 build/run.sh
 create mode 100644 docs/data/SinoInpaint.mat
 create mode 100644 docs/data/lena_gray_512.tif
 create mode 100644 docs/demos/demoMatlab_3Ddenoise.m
 create mode 100644 docs/demos/demoMatlab_denoise.m
 create mode 100644 docs/demos/demoMatlab_inpaint.m
 create mode 100644 docs/demos/demo_cpu_inpainters.py
 create mode 100644 docs/demos/demo_cpu_regularisers.py
 create mode 100644 docs/demos/demo_cpu_regularisers3D.py
 create mode 100644 docs/demos/demo_cpu_vs_gpu_regularisers.py
 create mode 100644 docs/demos/demo_gpu_regularisers.py
 create mode 100644 docs/demos/demo_gpu_regularisers3D.py
 create mode 100644 docs/demos/qualitymetrics.py
 create mode 100644 recipe/bld.bat
 create mode 100644 recipe/build.sh
 create mode 100644 recipe/conda_build_config.yaml
 create mode 100644 recipe/meta.yaml
 create mode 100755 recipe/run_test.py
 create mode 100644 src/CMakeLists.txt
 create mode 100644 src/Core/CCPiDefines.h
 create mode 100644 src/Core/CMakeLists.txt
 create mode 100644 src/Core/inpainters_CPU/Diffusion_Inpaint_core.c
 create mode 100644 src/Core/inpainters_CPU/Diffusion_Inpaint_core.h
 create mode 100644 src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
 create mode 100644 src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
 create mode 100644 src/Core/regularisers_CPU/Diffus4th_order_core.c
 create mode 100644 src/Core/regularisers_CPU/Diffus4th_order_core.h
 create mode 100644 src/Core/regularisers_CPU/Diffusion_core.c
 create mode 100644 src/Core/regularisers_CPU/Diffusion_core.h
 create mode 100644 src/Core/regularisers_CPU/FGP_TV_core.c
 create mode 100644 src/Core/regularisers_CPU/FGP_TV_core.h
 create mode 100644 src/Core/regularisers_CPU/FGP_dTV_core.c
 create mode 100644 src/Core/regularisers_CPU/FGP_dTV_core.h
 create mode 100644 src/Core/regularisers_CPU/LLT_ROF_core.c
 create mode 100644 src/Core/regularisers_CPU/LLT_ROF_core.h
 create mode 100644 src/Core/regularisers_CPU/Nonlocal_TV_core.c
 create mode 100644 src/Core/regularisers_CPU/Nonlocal_TV_core.h
 create mode 100644 src/Core/regularisers_CPU/PatchSelect_core.c
 create mode 100644 src/Core/regularisers_CPU/PatchSelect_core.h
 create mode 100644 src/Core/regularisers_CPU/ROF_TV_core.c
 create mode 100644 src/Core/regularisers_CPU/ROF_TV_core.h
 create mode 100755 src/Core/regularisers_CPU/SB_TV_core.c
 create mode 100644 src/Core/regularisers_CPU/SB_TV_core.h
 create mode 100644 src/Core/regularisers_CPU/TGV_core.c
 create mode 100644 src/Core/regularisers_CPU/TGV_core.h
 create mode 100755 src/Core/regularisers_CPU/TNV_core.c
 create mode 100644 src/Core/regularisers_CPU/TNV_core.h
 create mode 100644 src/Core/regularisers_CPU/utils.c
 create mode 100644 src/Core/regularisers_CPU/utils.h
 create mode 100644 src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
 create mode 100644 src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
 create mode 100644 src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
 create mode 100644 src/Core/regularisers_GPU/LLT_ROF_GPU_core.h
 create mode 100644 src/Core/regularisers_GPU/NonlDiff_GPU_core.cu
 create mode 100644 src/Core/regularisers_GPU/NonlDiff_GPU_core.h
 create mode 100644 src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
 create mode 100644 src/Core/regularisers_GPU/PatchSelect_GPU_core.h
 create mode 100644 src/Core/regularisers_GPU/TGV_GPU_core.cu
 create mode 100644 src/Core/regularisers_GPU/TGV_GPU_core.h
 create mode 100755 src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
 create mode 100755 src/Core/regularisers_GPU/TV_FGP_GPU_core.h
 create mode 100755 src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
 create mode 100755 src/Core/regularisers_GPU/TV_ROF_GPU_core.h
 create mode 100755 src/Core/regularisers_GPU/TV_SB_GPU_core.cu
 create mode 100755 src/Core/regularisers_GPU/TV_SB_GPU_core.h
 create mode 100644 src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
 create mode 100644 src/Core/regularisers_GPU/dTV_FGP_GPU_core.h
 create mode 100644 src/Core/regularisers_GPU/shared.h
 create mode 100755 src/Matlab/CMakeLists.txt
 create mode 100644 src/Matlab/mex_compile/compileCPU_mex_Linux.m
 create mode 100644 src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
 create mode 100644 src/Matlab/mex_compile/compileGPU_mex.m
 create mode 100644 src/Matlab/mex_compile/installed/MEXed_files_location.txt
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/SB_TV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/TGV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/TNV.c
 create mode 100644 src/Matlab/mex_compile/regularisers_CPU/TV_energy.c
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
 create mode 100644 src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
 create mode 100644 src/Matlab/supp/RMSE.m
 create mode 100644 src/Matlab/supp/my_red_yellowMAP.mat
 create mode 100644 src/Python/CMakeLists.txt
 create mode 100644 src/Python/ccpi/__init__.py
 create mode 100644 src/Python/ccpi/filters/__init__.py
 create mode 100644 src/Python/ccpi/filters/regularisers.py
 create mode 100644 src/Python/setup-regularisers.py.in
 create mode 100644 src/Python/src/cpu_regularisers.pyx
 create mode 100644 src/Python/src/gpu_regularisers.pyx
 create mode 100644 test/lena_gray_512.tif
 create mode 100644 test/test_ROF_TV.py
 create mode 100644 test/testroutines.py

diff --git a/Wrappers/Python/conda-recipe/lena_gray_512.tif b/Wrappers/Python/conda-recipe/lena_gray_512.tif
deleted file mode 100644
index f80cafc..0000000
Binary files a/Wrappers/Python/conda-recipe/lena_gray_512.tif and /dev/null differ
diff --git a/build/FindAnacondaEnvironment.cmake b/build/FindAnacondaEnvironment.cmake
new file mode 100644
index 0000000..6475128
--- /dev/null
+++ b/build/FindAnacondaEnvironment.cmake
@@ -0,0 +1,154 @@
+#   Copyright 2017 Edoardo Pasca
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# #.rst:
+# FindAnacondaEnvironment
+# --------------
+#
+# Find Python executable and library for a specific Anaconda environment
+#
+# This module finds the Python interpreter for a specific Anaconda enviroment, 
+# if installed and determines where the include files and libraries are.  
+# This code sets the following variables:
+#
+# ::
+#   PYTHONINTERP_FOUND         - if the Python interpret has been found
+#   PYTHON_EXECUTABLE          - the Python interpret found
+#   PYTHON_LIBRARY             - path to the python library
+#   PYTHON_INCLUDE_PATH        - path to where Python.h is found (deprecated)
+#   PYTHON_INCLUDE_DIRS        - path to where Python.h is found
+#   PYTHONLIBS_VERSION_STRING  - version of the Python libs found (since CMake 2.8.8)
+#   PYTHON_VERSION_MAJOR       - major Python version
+#   PYTHON_VERSION_MINOR       - minor Python version
+#   PYTHON_VERSION_PATCH       - patch Python version
+
+
+
+function (findPythonForAnacondaEnvironment env)
+	if (WIN32)
+	  file(TO_CMAKE_PATH ${env}/python.exe PYTHON_EXECUTABLE)
+        elseif (UNIX)
+  	  file(TO_CMAKE_PATH ${env}/bin/python PYTHON_EXECUTABLE)
+	endif()
+
+	
+	message("findPythonForAnacondaEnvironment Found Python Executable" ${PYTHON_EXECUTABLE})
+	####### FROM FindPythonInterpr ########
+	# determine python version string
+	if(PYTHON_EXECUTABLE)
+		execute_process(COMMAND "${PYTHON_EXECUTABLE}" -c
+								"import sys; sys.stdout.write(';'.join([str(x) for x in sys.version_info[:3]]))"
+						OUTPUT_VARIABLE _VERSION
+						RESULT_VARIABLE _PYTHON_VERSION_RESULT
+						ERROR_QUIET)
+		if(NOT _PYTHON_VERSION_RESULT)
+			string(REPLACE ";" "." _PYTHON_VERSION_STRING "${_VERSION}")
+			list(GET _VERSION 0 _PYTHON_VERSION_MAJOR)
+			list(GET _VERSION 1 _PYTHON_VERSION_MINOR)
+			list(GET _VERSION 2 _PYTHON_VERSION_PATCH)
+			if(PYTHON_VERSION_PATCH EQUAL 0)
+				# it's called "Python 2.7", not "2.7.0"
+				string(REGEX REPLACE "\\.0$" "" _PYTHON_VERSION_STRING "${PYTHON_VERSION_STRING}")
+			endif()
+		else()
+			# sys.version predates sys.version_info, so use that
+			execute_process(COMMAND "${PYTHON_EXECUTABLE}" -c "import sys; sys.stdout.write(sys.version)"
+							OUTPUT_VARIABLE _VERSION
+							RESULT_VARIABLE _PYTHON_VERSION_RESULT
+							ERROR_QUIET)
+			if(NOT _PYTHON_VERSION_RESULT)
+				string(REGEX REPLACE " .*" "" _PYTHON_VERSION_STRING "${_VERSION}")
+				string(REGEX REPLACE "^([0-9]+)\\.[0-9]+.*" "\\1" _PYTHON_VERSION_MAJOR "${PYTHON_VERSION_STRING}")
+				string(REGEX REPLACE "^[0-9]+\\.([0-9])+.*" "\\1" _PYTHON_VERSION_MINOR "${PYTHON_VERSION_STRING}")
+				if(PYTHON_VERSION_STRING MATCHES "^[0-9]+\\.[0-9]+\\.([0-9]+)")
+					set(PYTHON_VERSION_PATCH "${CMAKE_MATCH_1}")
+				else()
+					set(PYTHON_VERSION_PATCH "0")
+				endif()
+			else()
+				# sys.version was first documented for Python 1.5, so assume
+				# this is older.
+				set(PYTHON_VERSION_STRING "1.4" PARENT_SCOPE)
+				set(PYTHON_VERSION_MAJOR "1" PARENT_SCOPE)
+				set(PYTHON_VERSION_MINOR "4" PARENT_SCOPE)
+				set(PYTHON_VERSION_PATCH "0" PARENT_SCOPE)
+			endif()
+		endif()
+		unset(_PYTHON_VERSION_RESULT)
+		unset(_VERSION)
+	endif()
+	###############################################
+	
+	set (PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} PARENT_SCOPE)
+	set (PYTHONINTERP_FOUND "ON" PARENT_SCOPE)
+	set (PYTHON_VERSION_STRING ${_PYTHON_VERSION_STRING} PARENT_SCOPE)
+	set (PYTHON_VERSION_MAJOR ${_PYTHON_VERSION_MAJOR} PARENT_SCOPE)
+	set (PYTHON_VERSION_MINOR ${_PYTHON_VERSION_MINOR} PARENT_SCOPE)
+	set (PYTHON_VERSION_PATCH ${_PYTHON_VERSION_PATCH} PARENT_SCOPE)
+	message("My version found " ${PYTHON_VERSION_STRING})
+	## find conda executable
+	if (WIN32)
+	  set (CONDA_EXECUTABLE ${env}/Script/conda PARENT_SCOPE)
+	elseif(UNIX)
+	  set (CONDA_EXECUTABLE ${env}/bin/conda PARENT_SCOPE)
+	endif()
+endfunction()
+
+
+
+set(Python_ADDITIONAL_VERSIONS 3.5)
+
+find_package(PythonInterp)
+if (PYTHONINTERP_FOUND)
+  
+  message("Found interpret " ${PYTHON_EXECUTABLE})
+  message("Python Library " ${PYTHON_LIBRARY})
+  message("Python Include Dir " ${PYTHON_INCLUDE_DIR})
+  message("Python Include Path " ${PYTHON_INCLUDE_PATH})
+  
+  foreach(pv ${PYTHON_VERSION_STRING})
+    message("Found interpret " ${pv})
+  endforeach()
+endif()
+
+
+
+find_package(PythonLibs)
+if (PYTHONLIB_FOUND) 
+  message("Found PythonLibs PYTHON_LIBRARIES " ${PYTHON_LIBRARIES})
+  message("Found PythonLibs PYTHON_INCLUDE_PATH " ${PYTHON_INCLUDE_PATH})
+  message("Found PythonLibs PYTHON_INCLUDE_DIRS " ${PYTHON_INCLUDE_DIRS})
+  message("Found PythonLibs PYTHONLIBS_VERSION_STRING " ${PYTHONLIBS_VERSION_STRING}  )
+else()
+  message("No PythonLibs Found")  
+endif()
+
+
+
+
+function(findPythonPackagesPath)
+   execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print (get_python_lib())"
+                      RESULT_VARIABLE PYTHON_CVPY_PROCESS
+                      OUTPUT_VARIABLE PYTHON_STD_PACKAGES_PATH
+                      OUTPUT_STRIP_TRAILING_WHITESPACE)
+   #message("STD_PACKAGES " ${PYTHON_STD_PACKAGES_PATH})
+   if("${PYTHON_STD_PACKAGES_PATH}" MATCHES "site-packages")
+        set(_PYTHON_PACKAGES_PATH "python${PYTHON_VERSION_MAJOR_MINOR}/site-packages")
+   endif()
+
+    SET(PYTHON_PACKAGES_PATH "${PYTHON_STD_PACKAGES_PATH}" PARENT_SCOPE)
+
+endfunction()
+
+
diff --git a/build/run.sh b/build/run.sh
new file mode 100644
index 0000000..a8e5555
--- /dev/null
+++ b/build/run.sh
@@ -0,0 +1,19 @@
+#!/bin/bash  
+echo "Building CCPi-regularisation Toolkit using CMake"  
+# rm -r build
+# Requires Cython, install it first: 
+# pip install cython
+# mkdir build
+cd build/
+make clean
+# install Python modules only without CUDA
+cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
+# install Python modules only with CUDA
+# cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
+make install
+# cp install/lib/libcilreg.so install/python/ccpi/filters
+cd install/python
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib
+# spyder
+# one can also run Matlab in Linux as:
+# PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" matlab
diff --git a/docs/data/SinoInpaint.mat b/docs/data/SinoInpaint.mat
new file mode 100644
index 0000000..d748fb4
Binary files /dev/null and b/docs/data/SinoInpaint.mat differ
diff --git a/docs/data/lena_gray_512.tif b/docs/data/lena_gray_512.tif
new file mode 100644
index 0000000..f80cafc
Binary files /dev/null and b/docs/data/lena_gray_512.tif differ
diff --git a/docs/demos/demoMatlab_3Ddenoise.m b/docs/demos/demoMatlab_3Ddenoise.m
new file mode 100644
index 0000000..0c331a4
--- /dev/null
+++ b/docs/demos/demoMatlab_3Ddenoise.m
@@ -0,0 +1,178 @@
+% Volume (3D) denoising demo using CCPi-RGL
+clear; close all
+Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
+Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
+Path3 = sprintf(['..' filesep 'supp'], 1i);
+addpath(Path1);
+addpath(Path2);
+addpath(Path3);
+
+N = 512; 
+slices = 7;
+vol3D = zeros(N,N,slices, 'single');
+Ideal3D = zeros(N,N,slices, 'single');
+Im = double(imread('lena_gray_512.tif'))/255;  % loading image
+for i = 1:slices
+vol3D(:,:,i) = Im + .05*randn(size(Im)); 
+Ideal3D(:,:,i) = Im;
+end
+vol3D(vol3D < 0) = 0;
+figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
+
+
+lambda_reg = 0.03; % regularsation parameter for all methods
+%%
+fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
+tau_rof = 0.0025; % time-marching constant 
+iter_rof = 300; % number of ROF iterations
+tic; u_rof = ROF_TV(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; 
+energyfunc_val_rof = TV_energy(single(u_rof),single(vol3D),lambda_reg, 1);  % get energy function value
+rmse_rof = (RMSE(Ideal3D(:),u_rof(:)));
+fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rof);
+figure; imshow(u_rof(:,:,7), [0 1]); title('ROF-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the ROF-TV model (GPU) \n');
+% tau_rof = 0.0025; % time-marching constant 
+% iter_rof = 300; % number of ROF iterations
+% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_reg, iter_rof, tau_rof); toc;
+% rmse_rofG = (RMSE(Ideal3D(:),u_rofG(:)));
+% fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rofG);
+% figure; imshow(u_rofG(:,:,7), [0 1]); title('ROF-TV denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using the FGP-TV model (CPU) \n');
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+tic; u_fgp = FGP_TV(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
+energyfunc_val_fgp = TV_energy(single(u_fgp),single(vol3D),lambda_reg, 1); % get energy function value
+rmse_fgp = (RMSE(Ideal3D(:),u_fgp(:)));
+fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgp);
+figure; imshow(u_fgp(:,:,7), [0 1]); title('FGP-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the FGP-TV model (GPU) \n');
+% iter_fgp = 300; % number of FGP iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
+% rmse_fgpG = (RMSE(Ideal3D(:),u_fgpG(:)));
+% fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgpG);
+% figure; imshow(u_fgpG(:,:,7), [0 1]); title('FGP-TV denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using the SB-TV model (CPU) \n');
+iter_sb = 150; % number of SB iterations
+epsil_tol =  1.0e-05; % tolerance
+tic; u_sb = SB_TV(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
+energyfunc_val_sb = TV_energy(single(u_sb),single(vol3D),lambda_reg, 1);  % get energy function value
+rmse_sb = (RMSE(Ideal3D(:),u_sb(:)));
+fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sb);
+figure; imshow(u_sb(:,:,7), [0 1]); title('SB-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the SB-TV model (GPU) \n');
+% iter_sb = 150; % number of SB iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_sbG = SB_TV_GPU(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
+% rmse_sbG = (RMSE(Ideal3D(:),u_sbG(:)));
+% fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sbG);
+% figure; imshow(u_sbG(:,:,7), [0 1]); title('SB-TV denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using the ROF-LLT model (CPU) \n');
+lambda_ROF = lambda_reg; % ROF regularisation parameter
+lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
+iter_LLT = 300; % iterations 
+tau_rof_llt = 0.0025; % time-marching constant 
+tic; u_rof_llt = LLT_ROF(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt(:)));
+fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
+figure; imshow(u_rof_llt(:,:,7), [0 1]); title('ROF-LLT denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the ROF-LLT model (GPU) \n');
+% lambda_ROF = lambda_reg; % ROF regularisation parameter
+% lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
+% iter_LLT = 300; % iterations 
+% tau_rof_llt = 0.0025; % time-marching constant 
+% tic; u_rof_llt_g = LLT_ROF_GPU(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+% rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt_g(:)));
+% fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
+% figure; imshow(u_rof_llt_g(:,:,7), [0 1]); title('ROF-LLT denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using Nonlinear-Diffusion model (CPU) \n');
+iter_diff = 300; % number of diffusion iterations
+lambda_regDiff = 0.025; % regularisation for the diffusivity 
+sigmaPar = 0.015; % edge-preserving parameter
+tau_param = 0.025; % time-marching constant 
+tic; u_diff = NonlDiff(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+rmse_diff = (RMSE(Ideal3D(:),u_diff(:)));
+fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
+figure; imshow(u_diff(:,:,7), [0 1]); title('Diffusion denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using Nonlinear-Diffusion model (GPU) \n');
+% iter_diff = 300; % number of diffusion iterations
+% lambda_regDiff = 0.025; % regularisation for the diffusivity 
+% sigmaPar = 0.015; % edge-preserving parameter
+% tau_param = 0.025; % time-marching constant 
+% tic; u_diff_g = NonlDiff_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+% rmse_diff = (RMSE(Ideal3D(:),u_diff_g(:)));
+% fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
+% figure; imshow(u_diff_g(:,:,7), [0 1]); title('Diffusion denoised volume (GPU)');
+%%
+fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
+iter_diff = 300; % number of diffusion iterations
+lambda_regDiff = 3.5; % regularisation for the diffusivity 
+sigmaPar = 0.02; % edge-preserving parameter
+tau_param = 0.0015; % time-marching constant 
+tic; u_diff4 = Diffusion_4thO(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+rmse_diff4 = (RMSE(Ideal3D(:),u_diff4(:)));
+fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
+figure; imshow(u_diff4(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (CPU)');
+%%
+% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
+% iter_diff = 300; % number of diffusion iterations
+% lambda_regDiff = 3.5; % regularisation for the diffusivity 
+% sigmaPar = 0.02; % edge-preserving parameter
+% tau_param = 0.0015; % time-marching constant 
+% tic; u_diff4_g = Diffusion_4thO_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+% rmse_diff4 = (RMSE(Ideal3D(:),u_diff4_g(:)));
+% fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
+% figure; imshow(u_diff4_g(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (GPU)');
+%%
+fprintf('Denoise using the TGV model (CPU) \n');
+lambda_TGV = 0.03; % regularisation parameter
+alpha1 = 1.0; % parameter to control the first-order term
+alpha0 = 2.0; % parameter to control the second-order term
+iter_TGV = 500; % number of Primal-Dual iterations for TGV
+tic; u_tgv = TGV(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+rmseTGV = RMSE(Ideal3D(:),u_tgv(:));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
+figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)');
+%%
+%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
+fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
+
+% create another volume (reference) with slightly less amount of noise
+vol3D_ref = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
+end
+vol3D_ref(vol3D_ref < 0) = 0;
+% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv(:,:,7), [0 1]); title('FGP-dTV denoised volume (CPU)');
+%%
+fprintf('Denoise a volume using the FGP-dTV model (GPU) \n');
+
+% create another volume (reference) with slightly less amount of noise
+vol3D_ref = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
+end
+vol3D_ref(vol3D_ref < 0) = 0;
+% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv_g(:,:,7), [0 1]); title('FGP-dTV denoised volume (GPU)');
+%%
diff --git a/docs/demos/demoMatlab_denoise.m b/docs/demos/demoMatlab_denoise.m
new file mode 100644
index 0000000..14d3096
--- /dev/null
+++ b/docs/demos/demoMatlab_denoise.m
@@ -0,0 +1,189 @@
+% Image (2D) denoising demo using CCPi-RGL
+clear; close all
+fsep = '/';
+
+Path1 = sprintf(['..' fsep 'mex_compile' fsep 'installed'], 1i);
+Path2 = sprintf(['..' fsep '..' fsep '..' fsep 'data' fsep], 1i);
+Path3 = sprintf(['..' fsep 'supp'], 1i);
+addpath(Path1); addpath(Path2); addpath(Path3);
+
+Im = double(imread('lena_gray_512.tif'))/255;  % loading image
+u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+figure; imshow(u0, [0 1]); title('Noisy image');
+
+lambda_reg = 0.03; % regularsation parameter for all methods
+%%
+fprintf('Denoise using the ROF-TV model (CPU) \n');
+tau_rof = 0.0025; % time-marching constant 
+iter_rof = 750; % number of ROF iterations
+tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; 
+energyfunc_val_rof = TV_energy(single(u_rof),single(u0),lambda_reg, 1);  % get energy function value
+rmseROF = (RMSE(u_rof(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for ROF-TV is:', rmseROF);
+figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
+%%
+% fprintf('Denoise using the ROF-TV model (GPU) \n');
+% tau_rof = 0.0025; % time-marching constant 
+% iter_rof = 750; % number of ROF iterations
+% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc;
+% figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
+%%
+fprintf('Denoise using the FGP-TV model (CPU) \n');
+iter_fgp = 1000; % number of FGP iterations
+epsil_tol =  1.0e-06; % tolerance
+tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
+energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value
+rmseFGP = (RMSE(u_fgp(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmseFGP);
+figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
+
+%%
+% fprintf('Denoise using the FGP-TV model (GPU) \n');
+% iter_fgp = 1000; % number of FGP iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
+% figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
+%%
+fprintf('Denoise using the SB-TV model (CPU) \n');
+iter_sb = 150; % number of SB iterations
+epsil_tol =  1.0e-06; % tolerance
+tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
+energyfunc_val_sb = TV_energy(single(u_sb),single(u0),lambda_reg, 1);  % get energy function value
+rmseSB = (RMSE(u_sb(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmseSB);
+figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
+%%
+% fprintf('Denoise using the SB-TV model (GPU) \n');
+% iter_sb = 150; % number of SB iterations
+% epsil_tol =  1.0e-06; % tolerance
+% tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
+% figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');
+%%
+fprintf('Denoise using the TGV model (CPU) \n');
+lambda_TGV = 0.045; % regularisation parameter
+alpha1 = 1.0; % parameter to control the first-order term
+alpha0 = 2.0; % parameter to control the second-order term
+iter_TGV = 2000; % number of Primal-Dual iterations for TGV
+tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+rmseTGV = (RMSE(u_tgv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
+figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)');
+%%
+% fprintf('Denoise using the TGV model (GPU) \n');
+% lambda_TGV = 0.045; % regularisation parameter
+% alpha1 = 1.0; % parameter to control the first-order term
+% alpha0 = 2.0; % parameter to control the second-order term
+% iter_TGV = 2000; % number of Primal-Dual iterations for TGV
+% tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+% rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));
+% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu);
+% figure; imshow(u_tgv_gpu, [0 1]); title('TGV denoised image (GPU)');
+%%
+fprintf('Denoise using the ROF-LLT model (CPU) \n');
+lambda_ROF = lambda_reg; % ROF regularisation parameter
+lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
+iter_LLT = 1; % iterations 
+tau_rof_llt = 0.0025; % time-marching constant 
+tic; u_rof_llt = LLT_ROF(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+rmseROFLLT = (RMSE(u_rof_llt(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT);
+figure; imshow(u_rof_llt, [0 1]); title('ROF-LLT denoised image (CPU)');
+%%
+% fprintf('Denoise using the ROF-LLT model (GPU) \n');
+% lambda_ROF = lambda_reg; % ROF regularisation parameter
+% lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
+% iter_LLT = 500; % iterations 
+% tau_rof_llt = 0.0025; % time-marching constant 
+% tic; u_rof_llt_g = LLT_ROF_GPU(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+% rmseROFLLT_g = (RMSE(u_rof_llt_g(:),Im(:)));
+% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT_g);
+% figure; imshow(u_rof_llt_g, [0 1]); title('ROF-LLT denoised image (GPU)');
+%%
+fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n');
+iter_diff = 800; % number of diffusion iterations
+lambda_regDiff = 0.025; % regularisation for the diffusivity 
+sigmaPar = 0.015; % edge-preserving parameter
+tau_param = 0.025; % time-marching constant 
+tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+rmseDiffus = (RMSE(u_diff(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus);
+figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)');
+%%
+% fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n');
+% iter_diff = 800; % number of diffusion iterations
+% lambda_regDiff = 0.025; % regularisation for the diffusivity 
+% sigmaPar = 0.015; % edge-preserving parameter
+% tau_param = 0.025; % time-marching constant 
+% tic; u_diff_g = NonlDiff_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+% figure; imshow(u_diff_g, [0 1]); title('Diffusion denoised image (GPU)');
+%%
+fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
+iter_diff = 800; % number of diffusion iterations
+lambda_regDiff = 3.5; % regularisation for the diffusivity 
+sigmaPar = 0.02; % edge-preserving parameter
+tau_param = 0.0015; % time-marching constant 
+tic; u_diff4 = Diffusion_4thO(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+rmseDiffHO = (RMSE(u_diff4(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Fourth-order anisotropic diffusion is:', rmseDiffHO);
+figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
+%%
+% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
+% iter_diff = 800; % number of diffusion iterations
+% lambda_regDiff = 3.5; % regularisation for the diffusivity 
+% sigmaPar = 0.02; % edge-preserving parameter
+% tau_param = 0.0015; % time-marching constant 
+% tic; u_diff4_g = Diffusion_4thO_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+% figure; imshow(u_diff4_g, [0 1]); title('Diffusion 4thO denoised image (GPU)');
+%%
+fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n');
+SearchingWindow = 7;
+PatchWindow = 2;
+NeighboursNumber = 20; % the number of neibours to include
+h = 0.23; % edge related parameter for NLM
+tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); toc;
+%%
+fprintf('Denoise using Non-local Total Variation (CPU) \n');
+iter_nltv = 3; % number of nltv iterations
+lambda_nltv = 0.05; % regularisation parameter for nltv
+tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc; 
+rmse_nltv = (RMSE(u_nltv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv);
+figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)');
+%%
+%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
+
+fprintf('Denoise using the FGP-dTV model (CPU) \n');
+% create another image (reference) with slightly less amount of noise
+u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
+% u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+iter_fgp = 1000; % number of FGP iterations
+epsil_tol =  1.0e-06; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+rmse_dTV= (RMSE(u_fgp_dtv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Directional Total Variation (dTV) is:', rmse_dTV);
+figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
+%%
+% fprintf('Denoise using the FGP-dTV model (GPU) \n');
+% % create another image (reference) with slightly less amount of noise
+% u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
+% % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+% 
+% iter_fgp = 1000; % number of FGP iterations
+% epsil_tol =  1.0e-06; % tolerance
+% eta =  0.2; % Reference image gradient smoothing constant
+% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+% figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)');
+%%
+fprintf('Denoise using the TNV prior (CPU) \n');
+slices = 5; N = 512;
+vol3D = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D(:,:,i) = Im + .05*randn(size(Im)); 
+end
+vol3D(vol3D < 0) = 0;
+
+iter_tnv = 200; % number of TNV iterations
+tic; u_tnv = TNV(single(vol3D), lambda_reg, iter_tnv); toc; 
+figure; imshow(u_tnv(:,:,3), [0 1]); title('TNV denoised stack of channels (CPU)');
diff --git a/docs/demos/demoMatlab_inpaint.m b/docs/demos/demoMatlab_inpaint.m
new file mode 100644
index 0000000..66f9c15
--- /dev/null
+++ b/docs/demos/demoMatlab_inpaint.m
@@ -0,0 +1,35 @@
+% Image (2D) inpainting demo using CCPi-RGL
+clear; close all
+Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
+Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
+addpath(Path1);
+addpath(Path2);
+
+load('SinoInpaint.mat');
+Sinogram = Sinogram./max(Sinogram(:));
+Sino_mask = Sinogram.*(1-single(Mask));
+figure; 
+subplot(1,2,1); imshow(Sino_mask, [0 1]); title('Missing data sinogram');
+subplot(1,2,2); imshow(Mask, [0 1]); title('Mask');
+%%
+fprintf('Inpaint using Linear-Diffusion model (CPU) \n');
+iter_diff = 5000; % number of diffusion iterations
+lambda_regDiff = 6000; % regularisation for the diffusivity 
+sigmaPar = 0.0; % edge-preserving parameter
+tau_param = 0.000075; % time-marching constant 
+tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+figure; imshow(u_diff, [0 1]); title('Linear-Diffusion inpainted sinogram (CPU)');
+%%
+fprintf('Inpaint using Nonlinear-Diffusion model (CPU) \n');
+iter_diff = 1500; % number of diffusion iterations
+lambda_regDiff = 80; % regularisation for the diffusivity 
+sigmaPar = 0.00009; % edge-preserving parameter
+tau_param = 0.000008; % time-marching constant 
+tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+figure; imshow(u_diff, [0 1]); title('Non-Linear Diffusion inpainted sinogram (CPU)');
+%%
+fprintf('Inpaint using Nonlocal Vertical Marching model (CPU) \n');
+Increment = 1; % linear increment for the searching window
+tic; [u_nom,maskupd] = NonlocalMarching_Inpaint(single(Sino_mask), Mask, Increment); toc;
+figure; imshow(u_nom, [0 1]); title('NVM inpainted sinogram (CPU)');
+%%
\ No newline at end of file
diff --git a/docs/demos/demo_cpu_inpainters.py b/docs/demos/demo_cpu_inpainters.py
new file mode 100644
index 0000000..3b4191b
--- /dev/null
+++ b/docs/demos/demo_cpu_inpainters.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Demonstration of CPU inpainters
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from scipy import io
+from ccpi.filters.regularisers import NDF_INP, NVM_INP
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'maskData':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+
+# read sinogram and the mask
+filename = os.path.join(".." , ".." , ".." , "data" ,"SinoInpaint.mat")
+sino = io.loadmat(filename)
+sino_full = sino.get('Sinogram')
+Mask = sino.get('Mask')
+[angles_dim,detectors_dim] = sino_full.shape
+sino_full = sino_full/np.max(sino_full)
+#apply mask to sinogram
+sino_cut = sino_full*(1-Mask)
+#sino_cut_new = np.zeros((angles_dim,detectors_dim),'float32')
+#sino_cut_new = sino_cut.copy(order='c')
+#sino_cut_new[:] = sino_cut[:]
+sino_cut_new = np.ascontiguousarray(sino_cut, dtype=np.float32);
+#mask = np.zeros((angles_dim,detectors_dim),'uint8')
+#mask =Mask.copy(order='c')
+#mask[:] = Mask[:]
+mask = np.ascontiguousarray(Mask, dtype=np.uint8);
+
+plt.figure(1)
+plt.subplot(121)
+plt.imshow(sino_cut_new,vmin=0.0, vmax=1)
+plt.title('Missing Data sinogram')
+plt.subplot(122)
+plt.imshow(mask)
+plt.title('Mask')
+plt.show()
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Inpainting using linear diffusion (2D)__")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(2)
+plt.suptitle('Performance of linear inpainting using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Missing data sinogram')
+imgplot = plt.imshow(sino_cut_new,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF_INP, \
+        'input' : sino_cut_new,\
+        'maskData' : mask,\
+        'regularisation_parameter':5000,\
+        'edge_parameter':0,\
+        'number_of_iterations' :5000 ,\
+        'time_marching_parameter':0.000075,\
+        'penalty_type':0
+        }
+        
+start_time = timeit.default_timer()
+ndf_inp_linear = NDF_INP(pars['input'],
+              pars['maskData'],
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'])
+             
+rms = rmse(sino_full, ndf_inp_linear)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_inp_linear, cmap="gray")
+plt.title('{}'.format('Linear diffusion inpainting results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_Inpainting using nonlinear diffusion (2D)_")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Performance of nonlinear diffusion inpainting using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Missing data sinogram')
+imgplot = plt.imshow(sino_cut_new,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF_INP, \
+        'input' : sino_cut_new,\
+        'maskData' : mask,\
+        'regularisation_parameter':80,\
+        'edge_parameter':0.00009,\
+        'number_of_iterations' :1500 ,\
+        'time_marching_parameter':0.000008,\
+        'penalty_type':1
+        }
+        
+start_time = timeit.default_timer()
+ndf_inp_nonlinear = NDF_INP(pars['input'],
+              pars['maskData'],
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'])
+             
+rms = rmse(sino_full, ndf_inp_nonlinear)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_inp_nonlinear, cmap="gray")
+plt.title('{}'.format('Nonlinear diffusion inpainting results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("Inpainting using nonlocal vertical marching")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(4)
+plt.suptitle('Performance of NVM inpainting using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Missing data sinogram')
+imgplot = plt.imshow(sino_cut,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NVM_INP, \
+        'input' : sino_cut_new,\
+        'maskData' : mask,\
+        'SW_increment': 1,\
+        'number_of_iterations' : 150
+        }
+        
+start_time = timeit.default_timer()
+(nvm_inp, mask_upd) = NVM_INP(pars['input'],
+              pars['maskData'],
+              pars['SW_increment'],
+              pars['number_of_iterations'])
+             
+rms = rmse(sino_full, nvm_inp)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nvm_inp, cmap="gray")
+plt.title('{}'.format('Nonlocal Vertical Marching inpainting results'))
+#%%
diff --git a/docs/demos/demo_cpu_regularisers.py b/docs/demos/demo_cpu_regularisers.py
new file mode 100644
index 0000000..e6befa9
--- /dev/null
+++ b/docs/demos/demo_cpu_regularisers.py
@@ -0,0 +1,572 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of CPU regularisers 
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, TNV, NDF, Diff4th
+from ccpi.filters.regularisers import PatchSelect, NLTV
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255.0
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+
+# change dims to check that modules work with non-squared images
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________ROF-TV (2D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of ROF-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 1200,\
+        'time_marching_parameter': 0.0025        
+        }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'cpu')
+rms = rmse(Im, rof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-TV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, fgp_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of SB-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, sb_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____Total Generalised Variation (2D)______")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :1350 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+             
+rms = rmse(Im, tgv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("______________LLT- ROF (2D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : u0,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.01, \
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+        
+print ("#############LLT- ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_cpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+
+rms = rmse(Im, lltrof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("________________NDF (2D)___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NDF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : u0,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':1
+        }
+        
+print ("#############NDF CPU################")
+start_time = timeit.default_timer()
+ndf_cpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'cpu')  
+             
+rms = rmse(Im, ndf_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of Diff4th regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############Diff4th CPU################")
+start_time = timeit.default_timer()
+diff4_cpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+             
+rms = rmse(Im, diff4_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal patches pre-calculation____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+start_time = timeit.default_timer()
+# set parameters
+pars = {'algorithm' : PatchSelect, \
+        'input' : u0,\
+        'searchwindow': 7, \
+        'patchwindow': 2,\
+        'neighbours' : 15 ,\
+        'edge_parameter':0.18}
+
+H_i, H_j, Weights = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'cpu')
+              
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+"""
+plt.figure()
+plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
+plt.show()
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal Total Variation penalty____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NLTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars2 = {'algorithm' : NLTV, \
+        'input' : u0,\
+        'H_i': H_i, \
+        'H_j': H_j,\
+        'H_k' : 0,\
+        'Weights' : Weights,\
+        'regularisation_parameter': 0.04,\
+        'iterations': 3
+        }
+start_time = timeit.default_timer()
+nltv_cpu = NLTV(pars2['input'], 
+              pars2['H_i'],
+              pars2['H_j'], 
+              pars2['H_k'],
+              pars2['Weights'],
+              pars2['regularisation_parameter'],
+              pars2['iterations'])
+
+rms = rmse(Im, nltv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nltv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____________FGP-dTV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dtv_cpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+rms = rmse(Im, fgp_dtv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("__________Total nuclear Variation__________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TNV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+channelsNo = 5
+noisyVol = np.zeros((channelsNo,N,M),dtype='float32')
+idealVol = np.zeros((channelsNo,N,M),dtype='float32')
+
+for i in range (channelsNo):
+    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    idealVol[i,:,:] = Im
+
+# set parameters
+pars = {'algorithm' : TNV, \
+        'input' : noisyVol,\
+        'regularisation_parameter': 0.04, \
+        'number_of_iterations' : 200 ,\
+        'tolerance_constant':1e-05
+        }
+        
+print ("#############TNV CPU#################")
+start_time = timeit.default_timer()
+tnv_cpu = TNV(pars['input'],           
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'])
+             
+rms = rmse(idealVol, tnv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tnv_cpu[3,:,:], cmap="gray")
+plt.title('{}'.format('CPU results'))
diff --git a/docs/demos/demo_cpu_regularisers3D.py b/docs/demos/demo_cpu_regularisers3D.py
new file mode 100644
index 0000000..2d2fc22
--- /dev/null
+++ b/docs/demos/demo_cpu_regularisers3D.py
@@ -0,0 +1,458 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of 3D CPU regularisers 
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+
+# change dims to check that modules work with non-squared images
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+slices = 15
+
+noisyVol = np.zeros((slices,N,M),dtype='float32')
+noisyRef = np.zeros((slices,N,M),dtype='float32')
+idealVol = np.zeros((slices,N,M),dtype='float32')
+
+for i in range (slices):
+    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
+    idealVol[i,:,:] = Im
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________ROF-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of ROF-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy 15th slice of a volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 500,\
+        'time_marching_parameter': 0.0025
+        }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu3D = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'cpu')
+rms = rmse(idealVol, rof_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using ROF-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu3D = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(idealVol, fgp_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using FGP-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of SB-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu3D = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')
+             
+rms = rmse(idealVol, sb_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using SB-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________LLT-ROF (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : noisyVol,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.015, \
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+
+print ("#############LLT ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_cpu3D = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+
+rms = rmse(idealVol, lltrof_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using LLT-ROF'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________TGV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu3D = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+
+rms = rmse(idealVol, tgv_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using TGV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("________________NDF (3D)___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NDF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+        
+print ("#############NDF CPU################")
+start_time = timeit.default_timer()
+ndf_cpu3D = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'])  
+             
+rms = rmse(idealVol, ndf_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using NDF iterations'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of Diff4th regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : noisyVol,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############Diff4th CPU################")
+start_time = timeit.default_timer()
+diff4th_cpu3D = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'])  
+             
+rms = rmse(idealVol, diff4th_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4th_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using DIFF4th iterations'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-dTV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV,\
+        'input' : noisyVol,\
+        'refdata' : noisyRef,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dTV_cpu3D = FGP_dTV(pars['input'],
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'],
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+             
+rms = rmse(idealVol, fgp_dTV_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV'))
+#%%
diff --git a/docs/demos/demo_cpu_vs_gpu_regularisers.py b/docs/demos/demo_cpu_vs_gpu_regularisers.py
new file mode 100644
index 0000000..230a761
--- /dev/null
+++ b/docs/demos/demo_cpu_vs_gpu_regularisers.py
@@ -0,0 +1,790 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of CPU implementation against the GPU one
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from ccpi.filters.regularisers import PatchSelect
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)                     
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________ROF-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of ROF-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 4500,\
+        'time_marching_parameter': 0.00002
+        }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'cpu')
+rms = rmse(Im, rof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############ROF TV GPU##################")
+start_time = timeit.default_timer()
+rof_gpu = ROF_TV(pars['input'], 
+                     pars['regularisation_parameter'],
+                     pars['number_of_iterations'], 
+                     pars['time_marching_parameter'],'gpu')
+                     
+rms = rmse(Im, rof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = ROF_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(rof_cpu))
+diff_im = abs(rof_cpu - rof_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of FGP-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :1200 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, fgp_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############FGP TV GPU##################")
+start_time = timeit.default_timer()
+fgp_gpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(fgp_cpu))
+diff_im = abs(fgp_cpu - fgp_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________SB-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-05,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB-TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, sb_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+sb_gpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, sb_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = SB_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(sb_cpu))
+diff_im = abs(sb_cpu - sb_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________TGV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of TGV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :400 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+rms = rmse(Im, tgv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############TGV GPU##################")
+start_time = timeit.default_timer()
+tgv_gpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')
+                                   
+rms = rmse(Im, tgv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = TGV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(tgv_gpu))
+diff_im = abs(tgv_cpu - tgv_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________LLT-ROF bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of LLT-ROF regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : u0,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.01, \
+        'number_of_iterations' :4500 ,\
+        'time_marching_parameter' :0.00002 ,\
+        }
+        
+print ("#############LLT- ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_cpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+
+rms = rmse(Im, lltrof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("#############LLT- ROF GPU####################")
+start_time = timeit.default_timer()
+lltrof_gpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+
+rms = rmse(Im, lltrof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = LLT_ROF
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(lltrof_gpu))
+diff_im = abs(lltrof_cpu - lltrof_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________NDF bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of NDF regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : u0,\
+        'regularisation_parameter':0.06, \
+        'edge_parameter':0.04,\
+        'number_of_iterations' :1000 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+        
+print ("#############NDF CPU####################")
+start_time = timeit.default_timer()
+ndf_cpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'cpu')
+             
+rms = rmse(Im, ndf_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############NDF GPU##################")
+start_time = timeit.default_timer()
+ndf_gpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'gpu')
+             
+rms = rmse(Im, ndf_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = NDF
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(ndf_cpu))
+diff_im = abs(ndf_cpu - ndf_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of Diff4th regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.001
+        }
+
+print ("#############Diff4th CPU####################")
+start_time = timeit.default_timer()
+diff4th_cpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+             
+rms = rmse(Im, diff4th_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4th_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############Diff4th GPU##################")
+start_time = timeit.default_timer()
+diff4th_gpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 'gpu')
+             
+rms = rmse(Im, diff4th_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = Diff4th
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4th_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(diff4th_cpu))
+diff_im = abs(diff4th_cpu - diff4th_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :1000 ,\
+        'tolerance_constant':1e-07,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dtv_cpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+             
+rms = rmse(Im, fgp_dtv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############FGP dTV GPU##################")
+start_time = timeit.default_timer()
+fgp_dtv_gpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+rms = rmse(Im, fgp_dtv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_dTV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(fgp_dtv_cpu))
+diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____Non-local regularisation bench_________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of Nonlocal TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars = {'algorithm' : PatchSelect, \
+        'input' : u0,\
+        'searchwindow': 7, \
+        'patchwindow': 2,\
+        'neighbours' : 15 ,\
+        'edge_parameter':0.18}
+
+print ("############## Nonlocal Patches on CPU##################")
+start_time = timeit.default_timer()
+H_i, H_j, WeightsCPU = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'cpu')
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+
+print ("############## Nonlocal Patches on GPU##################")
+start_time = timeit.default_timer()
+start_time = timeit.default_timer()
+H_i, H_j, WeightsGPU = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'gpu')
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(u0))
+diff_im = abs(WeightsCPU[0,:,:] - WeightsGPU[0,:,:])
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,2,2)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
\ No newline at end of file
diff --git a/docs/demos/demo_gpu_regularisers.py b/docs/demos/demo_gpu_regularisers.py
new file mode 100644
index 0000000..e1c6575
--- /dev/null
+++ b/docs/demos/demo_gpu_regularisers.py
@@ -0,0 +1,518 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of GPU regularisers
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from ccpi.filters.regularisers import PatchSelect, NLTV
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)                     
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________ROF-TV regulariser_____________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the ROF-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 1200,\
+        'time_marching_parameter': 0.0025
+        }
+print ("##############ROF TV GPU##################")
+start_time = timeit.default_timer()
+rof_gpu = ROF_TV(pars['input'], 
+                     pars['regularisation_parameter'],
+                     pars['number_of_iterations'], 
+                     pars['time_marching_parameter'],'gpu')
+                     
+rms = rmse(Im, rof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = ROF_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-TV regulariser_____________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the FGP-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :1200 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############FGP TV GPU##################")
+start_time = timeit.default_timer()
+fgp_gpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________SB-TV regulariser______________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the SB-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+sb_gpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, sb_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = SB_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____Total Generalised Variation (2D)______")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :1250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_gpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')  
+             
+             
+rms = rmse(Im, tgv_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("______________LLT- ROF (2D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : u0,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.01, \
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+        
+print ("#############LLT- ROF GPU####################")
+start_time = timeit.default_timer()
+lltrof_gpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+             
+             
+rms = rmse(Im, lltrof_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________NDF regulariser_____________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the NDF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : u0,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+
+print ("##############NDF GPU##################")
+start_time = timeit.default_timer()
+ndf_gpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'gpu')  
+             
+rms = rmse(Im, ndf_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = NDF
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of Diff4th regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############DIFF4th CPU################")
+start_time = timeit.default_timer()
+diff4_gpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+             
+rms = rmse(Im, diff4_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal patches pre-calculation____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+start_time = timeit.default_timer()
+# set parameters
+pars = {'algorithm' : PatchSelect, \
+        'input' : u0,\
+        'searchwindow': 7, \
+        'patchwindow': 2,\
+        'neighbours' : 15 ,\
+        'edge_parameter':0.18}
+
+H_i, H_j, Weights = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'gpu')
+              
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+"""
+plt.figure()
+plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
+plt.show()
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal Total Variation penalty____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NLTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars2 = {'algorithm' : NLTV, \
+        'input' : u0,\
+        'H_i': H_i, \
+        'H_j': H_j,\
+        'H_k' : 0,\
+        'Weights' : Weights,\
+        'regularisation_parameter': 0.02,\
+        'iterations': 3
+        }
+start_time = timeit.default_timer()
+nltv_cpu = NLTV(pars2['input'], 
+              pars2['H_i'],
+              pars2['H_j'], 
+              pars2['H_k'],
+              pars2['Weights'],
+              pars2['regularisation_parameter'],
+              pars2['iterations'])
+
+rms = rmse(Im, nltv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nltv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############FGP dTV GPU##################")
+start_time = timeit.default_timer()
+fgp_dtv_gpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_dtv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_dTV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
diff --git a/docs/demos/demo_gpu_regularisers3D.py b/docs/demos/demo_gpu_regularisers3D.py
new file mode 100644
index 0000000..b6058d2
--- /dev/null
+++ b/docs/demos/demo_gpu_regularisers3D.py
@@ -0,0 +1,460 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of GPU regularisers
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)                     
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+
+
+slices = 20
+
+filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+
+noisyVol = np.zeros((slices,N,N),dtype='float32')
+noisyRef = np.zeros((slices,N,N),dtype='float32')
+idealVol = np.zeros((slices,N,N),dtype='float32')
+
+for i in range (slices):
+    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
+    idealVol[i,:,:] = Im
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________ROF-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of ROF-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy 15th slice of a volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 500,\
+        'time_marching_parameter': 0.0025        
+        }
+print ("#############ROF TV GPU####################")
+start_time = timeit.default_timer()
+rof_gpu3D = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'gpu')
+rms = rmse(idealVol, rof_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using ROF-TV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############FGP TV GPU####################")
+start_time = timeit.default_timer()
+fgp_gpu3D = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, fgp_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using FGP-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of SB-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :100 ,\
+        'tolerance_constant':1e-05,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############SB TV GPU####################")
+start_time = timeit.default_timer()
+sb_gpu3D = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, sb_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using SB-TV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________LLT-ROF (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : noisyVol,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.015, \
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+
+print ("#############LLT ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_gpu3D = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+
+rms = rmse(idealVol, lltrof_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using LLT-ROF'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________TGV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :600 ,\
+        'LipshitzConstant' :12 ,\
+        }
+
+print ("#############TGV GPU####################")
+start_time = timeit.default_timer()
+tgv_gpu3D = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')
+             
+
+rms = rmse(idealVol, tgv_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using TGV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________NDF-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NDF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+
+print ("#############NDF GPU####################")
+start_time = timeit.default_timer()
+ndf_gpu3D = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'gpu')
+
+rms = rmse(idealVol, ndf_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using NDF'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (3D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of DIFF4th regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : noisyVol,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############DIFF4th CPU################")
+start_time = timeit.default_timer()
+diff4_gpu3D = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+             
+rms = rmse(idealVol, diff4_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-dTV (3D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-dTV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : noisyVol,\
+        'refdata' : noisyRef,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############FGP TV GPU####################")
+start_time = timeit.default_timer()
+fgp_dTV_gpu3D = FGP_dTV(pars['input'],
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'],
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, fgp_dTV_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV'))
+#%%
diff --git a/docs/demos/qualitymetrics.py b/docs/demos/qualitymetrics.py
new file mode 100644
index 0000000..850829e
--- /dev/null
+++ b/docs/demos/qualitymetrics.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 21 13:34:32 2018
+# quality metrics
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+import numpy as np
+
+def nrmse(im1, im2):
+    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
+    max_val = max(np.max(im1), np.max(im2))
+    min_val = min(np.min(im1), np.min(im2))
+    return 1 - (rmse / (max_val - min_val))
+    
+def rmse(im1, im2):
+    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
+    return rmse
diff --git a/recipe/bld.bat b/recipe/bld.bat
new file mode 100644
index 0000000..6c84355
--- /dev/null
+++ b/recipe/bld.bat
@@ -0,0 +1,20 @@
+IF NOT DEFINED CIL_VERSION (
+ECHO CIL_VERSION Not Defined.
+exit 1
+)
+
+mkdir "%SRC_DIR%\ccpi"
+ROBOCOPY /E "%RECIPE_DIR%\..\.." "%SRC_DIR%\ccpi"
+ROBOCOPY /E "%RECIPE_DIR%\..\..\..\Core" "%SRC_DIR%\Core"
+::cd %SRC_DIR%\ccpi\Python
+cd %SRC_DIR%
+
+:: issue cmake to create setup.py
+cmake -G "NMake Makefiles" %RECIPE_DIR%\..\..\..\ -DBUILD_PYTHON_WRAPPERS=ON -DCONDA_BUILD=ON -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB="%CONDA_PREFIX%\lib" -DLIBRARY_INC="%CONDA_PREFIX%" -DCMAKE_INSTALL_PREFIX="%PREFIX%\Library" 
+
+::%PYTHON% setup-regularisers.py build_ext
+::if errorlevel 1 exit 1
+::%PYTHON% setup-regularisers.py install
+::if errorlevel 1 exit 1
+nmake install
+if errorlevel 1 exit 1
\ No newline at end of file
diff --git a/recipe/build.sh b/recipe/build.sh
new file mode 100644
index 0000000..1d54b6f
--- /dev/null
+++ b/recipe/build.sh
@@ -0,0 +1,18 @@
+
+mkdir "$SRC_DIR/ccpi"
+cp -rv "$RECIPE_DIR/../src/Matlab" "$SRC_DIR/ccpi"
+cp -rv "$RECIPE_DIR/../src/Python" "$SRC_DIR/ccpi"
+cp -rv "$RECIPE_DIR/../src/Core" "$SRC_DIR/Core"
+
+cd $SRC_DIR
+##cuda=off
+
+cmake -G "Unix Makefiles" $RECIPE_DIR/../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX
+
+
+make install
+
+#$PYTHON setup-regularisers.py build_ext
+#$PYTHON setup-regularisers.py install
+
+
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
new file mode 100644
index 0000000..fbe82dc
--- /dev/null
+++ b/recipe/conda_build_config.yaml
@@ -0,0 +1,9 @@
+python:
+  - 2.7 # [not win]
+  - 3.5
+  - 3.6
+#  - 3.7
+numpy:
+  - 1.12
+  - 1.14
+  - 1.15
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
new file mode 100644
index 0000000..7435b2b
--- /dev/null
+++ b/recipe/meta.yaml
@@ -0,0 +1,40 @@
+package:
+  name: ccpi-regulariser
+  version: {{CIL_VERSION}}
+  
+build:
+  preserve_egg_dir: False
+  number: 0
+  script_env:
+    - CIL_VERSION
+  
+test:
+  files:
+    - lena_gray_512.tif
+  requires:
+    - pillow=4.1.1
+
+requirements:
+  build:
+    - python
+    - numpy {{ numpy }}
+    - setuptools
+    - cython
+    - vc 14 # [win and py36] 
+    - vc 14 # [win and py35] 
+    - vc 9  # [win and py27]
+    - cmake 
+
+  run:
+    - {{ pin_compatible('numpy', max_pin='x.x') }}
+    - python
+    - numpy
+    - vc 14 # [win and py36] 
+    - vc 14 # [win and py35] 
+    - vc 9  # [win and py27]
+    - libgcc-ng
+
+about:
+  home: http://www.ccpi.ac.uk
+  license:  BSD license
+  summary: 'CCPi Core Imaging Library Quantification Toolbox'
diff --git a/recipe/run_test.py b/recipe/run_test.py
new file mode 100755
index 0000000..21f3216
--- /dev/null
+++ b/recipe/run_test.py
@@ -0,0 +1,819 @@
+import unittest
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from PIL import Image
+
+class TiffReader(object):
+    def imread(self, filename):
+        return np.asarray(Image.open(filename))
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+def nrmse(im1, im2):
+    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
+    max_val = max(np.max(im1), np.max(im2))
+    min_val = min(np.min(im1), np.min(im2))
+    return 1 - (rmse / (max_val - min_val))
+    
+def rmse(im1, im2):
+    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
+    return rmse
+###############################################################################
+
+class TestRegularisers(unittest.TestCase):
+    
+
+    def test_ROF_TV_CPU_vs_GPU(self):
+        #print ("tomas debug test function")
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("____________ROF-TV bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        # set parameters
+        pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 2500,\
+        'time_marching_parameter': 0.00002
+        }
+        print ("#############ROF TV CPU####################")
+        start_time = timeit.default_timer()
+        rof_cpu = ROF_TV(pars['input'],
+                     pars['regularisation_parameter'],
+                     pars['number_of_iterations'],
+                     pars['time_marching_parameter'],'cpu')
+        rms = rmse(Im, rof_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("##############ROF TV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            rof_gpu = ROF_TV(pars['input'], 
+                             pars['regularisation_parameter'],
+                             pars['number_of_iterations'], 
+                             pars['time_marching_parameter'],'gpu')
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, rof_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = ROF_TV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-04
+        diff_im = np.zeros(np.shape(rof_cpu))
+        diff_im = abs(rof_cpu - rof_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum() , 1)
+        
+    def test_FGP_TV_CPU_vs_GPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("____________FGP-TV bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        
+        # set parameters
+        pars = {'algorithm' : FGP_TV, \
+                'input' : u0,\
+                'regularisation_parameter':0.04, \
+                'number_of_iterations' :1200 ,\
+                'tolerance_constant':0.00001,\
+                'methodTV': 0 ,\
+                'nonneg': 0 ,\
+                'printingOut': 0 
+                }
+                
+        print ("#############FGP TV CPU####################")
+        start_time = timeit.default_timer()
+        fgp_cpu = FGP_TV(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['number_of_iterations'],
+                      pars['tolerance_constant'], 
+                      pars['methodTV'],
+                      pars['nonneg'],
+                      pars['printingOut'],'cpu')  
+                     
+                     
+        rms = rmse(Im, fgp_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        
+        print ("##############FGP TV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            fgp_gpu = FGP_TV(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['number_of_iterations'],
+                      pars['tolerance_constant'], 
+                      pars['methodTV'],
+                      pars['nonneg'],
+                      pars['printingOut'],'gpu')
+
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, fgp_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = FGP_TV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        
+        print ("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(fgp_cpu))
+        diff_im = abs(fgp_cpu - fgp_gpu)
+        diff_im[diff_im > tolerance] = 1
+
+        self.assertLessEqual(diff_im.sum() , 1)
+
+    def test_SB_TV_CPU_vs_GPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("____________SB-TV bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        
+        # set parameters
+        pars = {'algorithm' : SB_TV, \
+                'input' : u0,\
+                'regularisation_parameter':0.04, \
+                'number_of_iterations' :150 ,\
+                'tolerance_constant':1e-05,\
+                'methodTV': 0 ,\
+                'printingOut': 0 
+                }
+                
+        print ("#############SB-TV CPU####################")
+        start_time = timeit.default_timer()
+        sb_cpu = SB_TV(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['number_of_iterations'],
+                      pars['tolerance_constant'], 
+                      pars['methodTV'],
+                      pars['printingOut'],'cpu')  
+                     
+                     
+        rms = rmse(Im, sb_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        
+        print ("##############SB TV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            
+            sb_gpu = SB_TV(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['number_of_iterations'],
+                      pars['tolerance_constant'], 
+                      pars['methodTV'],
+                      pars['printingOut'],'gpu')
+
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, sb_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = SB_TV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(sb_cpu))
+        diff_im = abs(sb_cpu - sb_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum(), 1)
+
+    def test_TGV_CPU_vs_GPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("____________TGV bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        
+        # set parameters
+        pars = {'algorithm' : TGV, \
+                'input' : u0,\
+                'regularisation_parameter':0.04, \
+                'alpha1':1.0,\
+                'alpha0':2.0,\
+                'number_of_iterations' :250 ,\
+                'LipshitzConstant' :12 ,\
+                }
+                
+        print ("#############TGV CPU####################")
+        start_time = timeit.default_timer()
+        tgv_cpu = TGV(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['alpha1'],
+                      pars['alpha0'],
+                      pars['number_of_iterations'],
+                      pars['LipshitzConstant'],'cpu')
+                     
+        rms = rmse(Im, tgv_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        
+        print ("##############TGV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            tgv_gpu = TGV(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['alpha1'],
+                      pars['alpha0'],
+                      pars['number_of_iterations'],
+                      pars['LipshitzConstant'],'gpu')
+
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, tgv_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = TGV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(tgv_gpu))
+        diff_im = abs(tgv_cpu - tgv_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum() , 1)
+
+    def test_LLT_ROF_CPU_vs_GPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("____________LLT-ROF bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        
+        # set parameters
+        pars = {'algorithm' : LLT_ROF, \
+                'input' : u0,\
+                'regularisation_parameterROF':0.04, \
+                'regularisation_parameterLLT':0.01, \
+                'number_of_iterations' :1000 ,\
+                'time_marching_parameter' :0.0001 ,\
+                }
+                
+        print ("#############LLT- ROF CPU####################")
+        start_time = timeit.default_timer()
+        lltrof_cpu = LLT_ROF(pars['input'], 
+                      pars['regularisation_parameterROF'],
+                      pars['regularisation_parameterLLT'],
+                      pars['number_of_iterations'],
+                      pars['time_marching_parameter'],'cpu')
+        
+        rms = rmse(Im, lltrof_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("#############LLT- ROF GPU####################")
+        start_time = timeit.default_timer()
+        try:
+            lltrof_gpu = LLT_ROF(pars['input'], 
+                      pars['regularisation_parameterROF'],
+                      pars['regularisation_parameterLLT'],
+                      pars['number_of_iterations'],
+                      pars['time_marching_parameter'],'gpu')
+        
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, lltrof_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = LLT_ROF
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-04
+        diff_im = np.zeros(np.shape(lltrof_gpu))
+        diff_im = abs(lltrof_cpu - lltrof_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum(), 1)
+
+    def test_NDF_CPU_vs_GPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_______________NDF bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        
+        # set parameters
+        pars = {'algorithm' : NDF, \
+                'input' : u0,\
+                'regularisation_parameter':0.06, \
+                'edge_parameter':0.04,\
+                'number_of_iterations' :1000 ,\
+                'time_marching_parameter':0.025,\
+                'penalty_type':  1
+                }
+                
+        print ("#############NDF CPU####################")
+        start_time = timeit.default_timer()
+        ndf_cpu = NDF(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['edge_parameter'], 
+                      pars['number_of_iterations'],
+                      pars['time_marching_parameter'], 
+                      pars['penalty_type'],'cpu')
+                     
+        rms = rmse(Im, ndf_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        
+        print ("##############NDF GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            ndf_gpu = NDF(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['edge_parameter'], 
+                      pars['number_of_iterations'],
+                      pars['time_marching_parameter'], 
+                      pars['penalty_type'],'gpu')
+                     
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+        rms = rmse(Im, ndf_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = NDF
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(ndf_cpu))
+        diff_im = abs(ndf_cpu - ndf_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum(), 1)
+
+        
+    def test_Diff4th_CPU_vs_GPU(self):
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("___Anisotropic Diffusion 4th Order (2D)____")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        # set parameters
+        pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.001
+        }
+        
+        print ("#############Diff4th CPU####################")
+        start_time = timeit.default_timer()
+        diff4th_cpu = Diff4th(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['edge_parameter'], 
+                      pars['number_of_iterations'],
+                      pars['time_marching_parameter'],'cpu')
+                     
+        rms = rmse(Im, diff4th_cpu)
+        pars['rmse'] = rms
+
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("##############Diff4th GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            diff4th_gpu = Diff4th(pars['input'], 
+                      pars['regularisation_parameter'],
+                      pars['edge_parameter'], 
+                      pars['number_of_iterations'],
+                      pars['time_marching_parameter'], 'gpu')
+                     
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+        rms = rmse(Im, diff4th_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = Diff4th
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(diff4th_cpu))
+        diff_im = abs(diff4th_cpu - diff4th_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum() , 1)
+
+    def test_FDGdTV_CPU_vs_GPU(self):
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        
+        Im = Im/255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc = 0 ,
+                                          scale = perc * Im , 
+                                          size = np.shape(Im))
+        u_ref = Im + np.random.normal(loc = 0 ,
+                                          scale = 0.01 * Im , 
+                                          size = np.shape(Im))
+        
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        
+
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("____________FGP-dTV bench___________________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        
+        # set parameters
+        pars = {'algorithm' : FGP_dTV, \
+                'input' : u0,\
+                'refdata' : u_ref,\
+                'regularisation_parameter':0.04, \
+                'number_of_iterations' :1000 ,\
+                'tolerance_constant':1e-07,\
+                'eta_const':0.2,\
+                'methodTV': 0 ,\
+                'nonneg': 0 ,\
+                'printingOut': 0 
+                }
+                
+        print ("#############FGP dTV CPU####################")
+        start_time = timeit.default_timer()
+        fgp_dtv_cpu = FGP_dTV(pars['input'], 
+                      pars['refdata'], 
+                      pars['regularisation_parameter'],
+                      pars['number_of_iterations'],
+                      pars['tolerance_constant'], 
+                      pars['eta_const'], 
+                      pars['methodTV'],
+                      pars['nonneg'],
+                      pars['printingOut'],'cpu')
+                     
+                     
+        rms = rmse(Im, fgp_dtv_cpu)
+        pars['rmse'] = rms
+        
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("##############FGP dTV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            fgp_dtv_gpu = FGP_dTV(pars['input'], 
+                      pars['refdata'], 
+                      pars['regularisation_parameter'],
+                      pars['number_of_iterations'],
+                      pars['tolerance_constant'], 
+                      pars['eta_const'], 
+                      pars['methodTV'],
+                      pars['nonneg'],
+                      pars['printingOut'],'gpu')
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+        rms = rmse(Im, fgp_dtv_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = FGP_dTV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+        print (txtstr)
+        print ("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(fgp_dtv_cpu))
+        diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum(), 1)
+
+    def test_cpu_ROF_TV(self):
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
+        
+        filename = os.path.join("lena_gray_512.tif")
+
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        Im = Im/255
+        
+        """
+        # read noiseless image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+        """
+        tolerance = 1e-05
+        rms_rof_exp = 8.313131464999238e-05 #expected value for ROF model
+
+        # set parameters for ROF-TV
+        pars_rof_tv = {'algorithm': ROF_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04,\
+                            'number_of_iterations': 50,\
+                            'time_marching_parameter': 0.00001
+                            }
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing ROF-TV (2D, CPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        rof_cpu = ROF_TV(pars_rof_tv['input'],
+             pars_rof_tv['regularisation_parameter'],
+             pars_rof_tv['number_of_iterations'],
+             pars_rof_tv['time_marching_parameter'],'cpu')
+        rms_rof = rmse(Im, rof_cpu)
+        
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
+    def test_cpu_FGP_TV(self):
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
+        
+        filename = os.path.join("lena_gray_512.tif")
+
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        Im = Im/255
+        """
+        # read noiseless image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+        """
+        tolerance = 1e-05
+        rms_fgp_exp = 0.019152347 #expected value for FGP model
+        
+        pars_fgp_tv = {'algorithm' : FGP_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04, \
+                            'number_of_iterations' :50 ,\
+                            'tolerance_constant':1e-06,\
+                            'methodTV': 0 ,\
+                            'nonneg': 0 ,\
+                            'printingOut': 0 
+                            }
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing FGP-TV (2D, CPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        fgp_cpu = FGP_TV(pars_fgp_tv['input'], 
+              pars_fgp_tv['regularisation_parameter'],
+              pars_fgp_tv['number_of_iterations'],
+              pars_fgp_tv['tolerance_constant'], 
+              pars_fgp_tv['methodTV'],
+              pars_fgp_tv['nonneg'],
+              pars_fgp_tv['printingOut'],'cpu')  
+        rms_fgp = rmse(Im, fgp_cpu)
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
+
+    def test_gpu_ROF(self):
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
+        filename = os.path.join("lena_gray_512.tif")
+
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+        Im = Im/255
+        
+        tolerance = 1e-05
+        rms_rof_exp = 8.313131464999238e-05 #expected value for ROF model
+        
+        # set parameters for ROF-TV
+        pars_rof_tv = {'algorithm': ROF_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04,\
+                            'number_of_iterations': 50,\
+                            'time_marching_parameter': 0.00001
+                            }
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing ROF-TV (2D, GPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        try:
+            rof_gpu = ROF_TV(pars_rof_tv['input'],
+             pars_rof_tv['regularisation_parameter'],
+             pars_rof_tv['number_of_iterations'],
+             pars_rof_tv['time_marching_parameter'],'gpu')
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms_rof = rmse(Im, rof_gpu)
+        # now compare obtained rms with the expected value
+        self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
+    
+    def test_gpu_FGP(self):
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
+        filename = os.path.join("lena_gray_512.tif")
+
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)                     
+        Im = np.asarray(Im, dtype='float32')
+        Im = Im/255
+        tolerance = 1e-05
+        
+        rms_fgp_exp = 0.019152347 #expected value for FGP model
+        
+        # set parameters for FGP-TV
+        pars_fgp_tv = {'algorithm' : FGP_TV, \
+                            'input' : Im,\
+                            'regularisation_parameter':0.04, \
+                            'number_of_iterations' :50 ,\
+                            'tolerance_constant':1e-06,\
+                            'methodTV': 0 ,\
+                            'nonneg': 0 ,\
+                            'printingOut': 0 
+                            }
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print ("_________testing FGP-TV (2D, GPU)__________")
+        print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        try:
+            fgp_gpu = FGP_TV(pars_fgp_tv['input'], 
+              pars_fgp_tv['regularisation_parameter'],
+              pars_fgp_tv['number_of_iterations'],
+              pars_fgp_tv['tolerance_constant'], 
+              pars_fgp_tv['methodTV'],
+              pars_fgp_tv['nonneg'],
+              pars_fgp_tv['printingOut'],'gpu')  
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+        rms_fgp = rmse(Im, fgp_gpu)
+        # now compare obtained rms with the expected value
+
+        self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..bdcb8f4
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,19 @@
+#   Copyright 2017 Edoardo Pasca
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+if (BUILD_MATLAB_WRAPPER)
+    add_subdirectory(Matlab)
+endif()
+if (BUILD_PYTHON_WRAPPER)
+    add_subdirectory(Python)
+endif()
\ No newline at end of file
diff --git a/src/Core/CCPiDefines.h b/src/Core/CCPiDefines.h
new file mode 100644
index 0000000..d3038f9
--- /dev/null
+++ b/src/Core/CCPiDefines.h
@@ -0,0 +1,35 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Srikanth Nagella, Edoardo Pasca, Daniil Kazantsev
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#ifndef CCPIDEFINES_H
+#define CCPIDEFINES_H
+
+#if defined(_WIN32) || defined(__WIN32__)
+  #if defined(CCPiCore_EXPORTS) || defined(CCPiNexusWidget_EXPORTS) || defined(ContourTreeSegmentation_EXPORTS) || defined(ContourTree_EXPORTS)// add by CMake 
+    #define  CCPI_EXPORT __declspec(dllexport)
+    #define EXPIMP_TEMPLATE
+  #else
+    #define  CCPI_EXPORT __declspec(dllimport)
+    #define EXPIMP_TEMPLATE extern
+  #endif /* CCPi_EXPORTS */
+#elif defined(linux) || defined(__linux) || defined(__APPLE__)
+ #define CCPI_EXPORT
+#endif
+
+#endif
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
new file mode 100644
index 0000000..b3c0dfb
--- /dev/null
+++ b/src/Core/CMakeLists.txt
@@ -0,0 +1,151 @@
+#   Copyright 2018 Edoardo Pasca
+#cmake_minimum_required (VERSION 3.0)
+
+project(RGL_core)
+#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
+
+# The version number.
+
+set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
+
+# conda orchestrated build
+message("CIL_VERSION ${CIL_VERSION}")
+#include (GenerateExportHeader)
+
+
+find_package(OpenMP)
+if (OPENMP_FOUND)
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+   set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+   set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+   
+endif()
+
+## Build the regularisers package as a library
+message("Creating Regularisers as a shared library")
+
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+message("CMAKE_C_FLAGS ${CMAKE_C_FLAGS}")
+message("CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}")
+message("CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}")
+message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}")
+
+set(CMAKE_BUILD_TYPE "Release")
+
+if(WIN32)
+  set (FLAGS "/DWIN32 /EHsc /DCCPiCore_EXPORTS /openmp")
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+  set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+  set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT.lib")
+  
+  set (EXTRA_LIBRARIES)
+		
+  message("library lib: ${LIBRARY_LIB}")
+  
+elseif(UNIX)
+   set (FLAGS "-O2 -funsigned-char -Wall  -Wl,--no-undefined  -DCCPiReconstructionIterative_EXPORTS ")  
+   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+   set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+  
+   set (EXTRA_LIBRARIES 
+		"gomp"
+		"m"
+		)
+   
+endif()
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+
+## Build the regularisers package as a library
+message("Adding regularisers as a shared library")
+
+#set(CMAKE_C_COMPILER /apps/pgi/linux86-64/17.4/bin/pgcc)
+#set(CMAKE_C_FLAGS "-acc -Minfo -ta=tesla:cc20 -openmp")
+#set(CMAKE_C_FLAGS "-acc -Minfo -ta=multicore -openmp -fPIC")
+add_library(cilreg SHARED
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffusion_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffus4th_order_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/LLT_ROF_core.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TNV_core.c
+            ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Nonlocal_TV_core.c
+            ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchSelect_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/utils.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/Diffusion_Inpaint_core.c
+	    ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/NonlocalMarching_Inpaint_core.c
+	    )
+target_link_libraries(cilreg ${EXTRA_LIBRARIES} )
+include_directories(cilreg PUBLIC 
+                      ${LIBRARY_INC}/include 
+					  ${CMAKE_CURRENT_SOURCE_DIR}
+		              ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/
+		              ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/  )
+
+## Install
+
+if (UNIX)
+message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
+install(TARGETS cilreg
+	LIBRARY DESTINATION lib
+	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+	)
+elseif(WIN32)
+message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
+  install(TARGETS cilreg 
+	RUNTIME DESTINATION bin
+	ARCHIVE DESTINATION lib
+	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+	)
+endif()
+
+
+
+# GPU Regularisers
+if (BUILD_CUDA)
+    find_package(CUDA)
+    if (CUDA_FOUND)
+      set(CUDA_NVCC_FLAGS "-Xcompiler -fPIC -shared -D_FORCE_INLINES")
+      message("CUDA FLAGS ${CUDA_NVCC_FLAGS}")
+      CUDA_ADD_LIBRARY(cilregcuda SHARED
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_SB_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/LLT_ROF_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TGV_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/NonlDiff_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/Diffus_4thO_GPU_core.cu
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/PatchSelect_GPU_core.cu
+      )
+      if (UNIX)
+        message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
+        install(TARGETS cilregcuda
+        LIBRARY DESTINATION lib
+        CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+        )
+      elseif(WIN32)
+        message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
+        install(TARGETS cilregcuda
+        RUNTIME DESTINATION bin
+        ARCHIVE DESTINATION lib
+        CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+        )
+      endif()
+    else()
+      message("CUDA NOT FOUND")
+    endif()
+endif()
+
+if (${BUILD_MATLAB_WRAPPER})
+  if (WIN32)
+        install(TARGETS cilreg DESTINATION ${MATLAB_DEST})
+        if (CUDA_FOUND)
+            install(TARGETS cilregcuda DESTINATION ${MATLAB_DEST})
+        endif()
+  endif()
+endif()
diff --git a/src/Core/inpainters_CPU/Diffusion_Inpaint_core.c b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.c
new file mode 100644
index 0000000..08b168a
--- /dev/null
+++ b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.c
@@ -0,0 +1,322 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Diffusion_Inpaint_core.h"
+#include "utils.h"
+
+/*sign function*/
+int signNDF_inc(float x) {
+    return (x > 0) - (x < 0);
+}
+
+/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Image/volume to inpaint
+ * 2. Mask of the same size as (1) in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
+ * 3. lambda - regularization parameter
+ * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 5. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 6. tau - time-marching step for explicit scheme
+ * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Inpainted image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ)
+{
+    long i, pointsone;
+    float sigmaPar2;
+    sigmaPar2 = sigmaPar/sqrt(2.0f);
+    
+    /* copy into output */
+    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+    
+    pointsone = 0;
+    for (i=0; i<dimY*dimX*dimZ; i++) if (Mask[i] == 1) pointsone++;
+        
+    if (pointsone == 0) printf("%s \n", "Nothing to inpaint, zero mask!");
+    else {
+    
+    if (dimZ == 1) {
+    /* running 2D diffusion iterations */
+    for(i=0; i < iterationsNumb; i++) {
+            if (sigmaPar == 0.0f) LinearDiff_Inp_2D(Input, Mask, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
+            else NonLinearDiff_Inp_2D(Input, Mask, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
+		}
+	}
+	else {
+	/* running 3D diffusion iterations */
+    for(i=0; i < iterationsNumb; i++) {
+            if (sigmaPar == 0.0f) LinearDiff_Inp_3D(Input, Mask, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+            else NonLinearDiff_Inp_3D(Input, Mask, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
+            }
+         }
+	}
+    return *Output;
+}
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY)
+{
+	long i,j,i1,i2,j1,j2,index;
+	float e,w,n,s,e1,w1,n1,s1;
+	
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = j*dimX+i;
+             
+             if (Mask[index] > 0) {
+				/*inpainting process*/
+                e = Output[j*dimX+i1];
+                w = Output[j*dimX+i2];
+                n = Output[j1*dimX+i];
+                s = Output[j2*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+			}
+		}}
+	return *Output;
+}
+
+/* nonlinear diffusion */
+float NonLinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
+{
+	long i,j,i1,i2,j1,j2,index;
+	float e,w,n,s,e1,w1,n1,s1;
+	
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = j*dimX+i;
+                
+        if (Mask[index] > 0) {
+		/*inpainting process*/
+                e = Output[j*dimX+i1];
+                w = Output[j*dimX+i2];
+                n = Output[j1*dimX+i];
+                s = Output[j2*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+
+            if (penaltytype == 1){
+            /* Huber penalty */
+            if (fabs(e1) > sigmaPar) e1 =  signNDF_inc(e1);
+            else e1 = e1/sigmaPar;
+            
+            if (fabs(w1) > sigmaPar) w1 =  signNDF_inc(w1);
+            else w1 = w1/sigmaPar;
+            
+            if (fabs(n1) > sigmaPar) n1 =  signNDF_inc(n1);
+            else n1 = n1/sigmaPar;
+            
+            if (fabs(s1) > sigmaPar) s1 =  signNDF_inc(s1);
+            else s1 = s1/sigmaPar;
+            }
+            else if (penaltytype == 2) {
+            /* Perona-Malik */
+            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+            }
+            else if (penaltytype == 3) {
+            /* Tukey Biweight */
+            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+            else e1 = 0.0f;
+            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+            else w1 = 0.0f;
+            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+            else n1 = 0.0f;
+            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+            else s1 = 0.0f;
+            }
+            else {
+				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+				break;
+				}
+           Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));  
+		}
+		}}
+	return *Output;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
+{
+	long i,j,k,i1,i2,j1,j2,k1,k2,index;
+	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+	
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+	k1 = k+1; if (k1 == dimZ) k1 = k-1;
+    k2 = k-1; if (k2 < 0) k2 = k+1;
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = (dimX*dimY)*k + j*dimX+i;
+            
+            if (Mask[index] > 0) {
+			/*inpainting process*/
+            
+                e = Output[(dimX*dimY)*k + j*dimX+i1];
+                w = Output[(dimX*dimY)*k + j*dimX+i2];
+                n = Output[(dimX*dimY)*k + j1*dimX+i];
+                s = Output[(dimX*dimY)*k + j2*dimX+i];
+                u = Output[(dimX*dimY)*k1 + j*dimX+i];
+                d = Output[(dimX*dimY)*k2 + j*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                u1 = u - Output[index];
+                d1 = d - Output[index];
+                
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
+			}
+		}}}
+	return *Output;
+}
+
+float NonLinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
+{
+	long i,j,k,i1,i2,j1,j2,k1,k2,index;
+	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+	
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+	k1 = k+1; if (k1 == dimZ) k1 = k-1;
+    k2 = k-1; if (k2 < 0) k2 = k+1;
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = (dimX*dimY)*k + j*dimX+i;
+            
+        if (Mask[index] > 0) {
+			/*inpainting process*/
+                e = Output[(dimX*dimY)*k + j*dimX+i1];
+                w = Output[(dimX*dimY)*k + j*dimX+i2];
+                n = Output[(dimX*dimY)*k + j1*dimX+i];
+                s = Output[(dimX*dimY)*k + j2*dimX+i];
+                u = Output[(dimX*dimY)*k1 + j*dimX+i];
+                d = Output[(dimX*dimY)*k2 + j*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                u1 = u - Output[index];
+                d1 = d - Output[index];
+                
+             if (penaltytype == 1){
+            /* Huber penalty */
+            if (fabs(e1) > sigmaPar) e1 =  signNDF_inc(e1);
+            else e1 = e1/sigmaPar;
+            
+            if (fabs(w1) > sigmaPar) w1 =  signNDF_inc(w1);
+            else w1 = w1/sigmaPar;
+            
+            if (fabs(n1) > sigmaPar) n1 =  signNDF_inc(n1);
+            else n1 = n1/sigmaPar;
+            
+            if (fabs(s1) > sigmaPar) s1 =  signNDF_inc(s1);
+            else s1 = s1/sigmaPar;
+            
+            if (fabs(u1) > sigmaPar) u1 =  signNDF_inc(u1);
+            else u1 = u1/sigmaPar;
+            
+            if (fabs(d1) > sigmaPar) d1 =  signNDF_inc(d1);
+            else d1 = d1/sigmaPar;
+            }
+            else if (penaltytype == 2) {
+            /* Perona-Malik */
+            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+            u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
+            d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
+            }
+            else if (penaltytype == 3) {
+            /* Tukey Biweight */
+            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+            else e1 = 0.0f;
+            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+            else w1 = 0.0f;
+            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+            else n1 = 0.0f;
+            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+            else s1 = 0.0f;
+            if (fabs(u1) <= sigmaPar) u1 =  u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
+            else u1 = 0.0f;
+            if (fabs(d1) <= sigmaPar) d1 =  d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
+            else d1 = 0.0f;
+            }
+            else {
+				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+				break;
+				}
+
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
+		}
+		}}}
+	return *Output;
+}
diff --git a/src/Core/inpainters_CPU/Diffusion_Inpaint_core.h b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.h
new file mode 100644
index 0000000..a96fe79
--- /dev/null
+++ b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.h
@@ -0,0 +1,61 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Image/volume to inpaint
+ * 2. Mask of the same size as (1) in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
+ * 3. lambda - regularization parameter
+ * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 5. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 6. tau - time-marching step for explicit scheme
+ * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Inpainted image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb,  float tau, int penaltytype, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float LinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY);
+CCPI_EXPORT float NonLinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY);
+CCPI_EXPORT float LinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float NonLinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
new file mode 100644
index 0000000..b488ca4
--- /dev/null
+++ b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
@@ -0,0 +1,188 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonlocalMarching_Inpaint_core.h"
+#include "utils.h"
+
+
+/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
+ * The method is heuristic but computationally efficent (especially for larger images).
+ * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
+ * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
+ *
+ * Input:
+ * 1. 2D image or sinogram with horizontal or inclined regions of missing data
+ * 2. Mask of the same size as A in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
+ * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice
+ *
+ * Output:
+ * 1. Inpainted image or a sinogram
+ * 2. updated mask
+ *
+ * Reference: D. Kazantsev (paper in preparation)
+ */
+
+float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ)
+{
+    int i, j, i_m, j_m, counter, iter, iterations_number, W_fullsize, switchmask, switchcurr, counterElements;
+    float *Gauss_weights;
+    
+    /* copying M to M_upd */
+    copyIm_unchar(M, M_upd, dimX, dimY, 1);
+    
+    /* Copying the image */
+    copyIm(Input, Output, dimX, dimY, 1);
+    
+    /* Find how many inpainting iterations (equal to the number of ones) required based on a mask  */
+    if (iterationsNumb == 0) {
+        iterations_number = 0;
+        for (i=0; i<dimY*dimX; i++) {
+            if (M[i] == 1) iterations_number++;
+        }
+        if ((int)(iterations_number/dimY) > dimX) iterations_number = dimX;
+    }
+    else iterations_number = iterationsNumb;
+    
+    if (iterations_number == 0) printf("%s \n", "Nothing to inpaint, zero mask!");
+    else {
+        
+        printf("%s %i \n", "Max iteration number equals to:", iterations_number);
+        
+        /* Inpainting iterations run here*/
+        int W_halfsize = 1;
+        for(iter=0; iter < iterations_number; iter++) {
+            
+            //if (mod (iter, 2) == 0) {W_halfsize += 1;}
+            // printf("%i \n", W_halfsize);
+            
+            /* pre-calculation of Gaussian distance weights  */
+            W_fullsize = (int)(2*W_halfsize + 1); /*full size of similarity window */
+            Gauss_weights = (float*)calloc(W_fullsize*W_fullsize,sizeof(float ));
+            counter = 0;
+            for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
+                for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
+                    Gauss_weights[counter] = exp(-(pow((i_m), 2) + pow((j_m), 2))/(2*W_halfsize*W_halfsize));
+                    counter++;
+                }
+            }
+            
+            if (trigger == 0) {
+                /*Matlab*/
+#pragma omp parallel for shared(Output, M_upd, Gauss_weights) private(i, j, switchmask, switchcurr)
+                for(j=0; j<dimY; j++) {
+                    switchmask = 0;
+                    for(i=0; i<dimX; i++) {
+                        switchcurr = 0;
+                        if ((M_upd[j*dimX + i] == 1) && (switchmask == 0)) {
+                            /* perform inpainting of the current pixel */
+                            inpaint_func(Output, M_upd, Gauss_weights, i, j, dimX, dimY, W_halfsize, W_fullsize);
+                            /* add value to the mask*/
+                            M_upd[j*dimX + i] = 0;
+                            switchmask = 1; switchcurr = 1;
+                        }
+                        if ((M_upd[j*dimX + i] == 0) && (switchmask == 1) && (switchcurr == 0)) {
+                            /* perform inpainting of the previous (i-1) pixel */
+                            inpaint_func(Output, M_upd, Gauss_weights, i-1, j, dimX, dimY, W_halfsize, W_fullsize);
+                            /* add value to the mask*/
+                            M_upd[(j)*dimX + i-1] = 0;
+                            switchmask = 0;
+                        }
+                    }
+                }
+            }
+            else {
+                /*Python*/
+                /* find a point in the mask to inpaint */
+#pragma omp parallel for shared(Output, M_upd, Gauss_weights) private(i, j, switchmask, switchcurr)
+                for(i=0; i<dimX; i++) {
+                    switchmask = 0;
+                    for(j=0; j<dimY; j++) {
+                        switchcurr = 0;
+                        if ((M_upd[j*dimX + i] == 1) && (switchmask == 0)) {
+                            /* perform inpainting of the current pixel */
+                            inpaint_func(Output, M_upd, Gauss_weights, i, j, dimX, dimY, W_halfsize, W_fullsize);
+                            /* add value to the mask*/
+                            M_upd[j*dimX + i] = 0;
+                            switchmask = 1; switchcurr = 1;
+                        }
+                        if ((M_upd[j*dimX + i] == 0) && (switchmask == 1) && (switchcurr == 0)) {
+                            /* perform inpainting of the previous (j-1) pixel */
+                            inpaint_func(Output, M_upd, Gauss_weights, i, j-1, dimX, dimY, W_halfsize, W_fullsize);
+                            /* add value to the mask*/
+                            M_upd[(j-1)*dimX + i] = 0;
+                            switchmask = 0;
+                        }
+                    }
+                }
+            }
+            free(Gauss_weights);
+            
+            /* check if possible to terminate iterations earlier */
+            counterElements = 0;
+            for(i=0; i<dimX*dimY; i++) if (M_upd[i] == 0) counterElements++;
+            
+            if (counterElements == dimX*dimY) {
+                printf("%s \n", "Padding completed!");
+                break;
+            }
+            W_halfsize += SW_increment;
+        }
+        printf("%s %i \n", "Iterations stopped at:", iter);
+    }
+    return *Output;
+}
+
+float inpaint_func(float *U, unsigned char *M_upd, float *Gauss_weights, int i, int j, int dimX, int dimY, int W_halfsize, int W_fullsize)
+{
+    int i1, j1, i_m, j_m, counter;
+    float sum_val, sumweight;
+    
+    /*method 1: inpainting based on Euclidian weights */
+    sumweight = 0.0f;
+    counter = 0; sum_val = 0.0f;
+    for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
+        i1 = i+i_m;
+        for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+                if (M_upd[j1*dimX + i1] == 0) {
+                    sumweight += Gauss_weights[counter];
+                }
+            }
+            counter++;
+        }
+    }
+    counter = 0; sum_val = 0.0f;
+    for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
+        i1 = i+i_m;
+        for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+                if ((M_upd[j1*dimX + i1] == 0) && (sumweight != 0.0f)) {
+                    /* we have data so add it with Euc weight */
+                    sum_val += (Gauss_weights[counter]/sumweight)*U[j1*dimX + i1];
+                }
+            }
+            counter++;
+        }
+    }
+    U[j*dimX + i] = sum_val;
+    return *U;
+}
+
diff --git a/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
new file mode 100644
index 0000000..0f99ed4
--- /dev/null
+++ b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
@@ -0,0 +1,54 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
+ * The method is heuristic but computationally efficent (especially for larger images).
+ * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
+ * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
+ *
+ * Inputs:
+ * 1. 2D image or sinogram with horizontal or inclined regions of missing data
+ * 2. Mask of the same size as A in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
+ * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice
+
+ * Output:
+ * 1. Inpainted image or a sinogram
+ * 2. updated mask
+ *
+ * Reference: TBA
+ */
+
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float inpaint_func(float *U, unsigned char *M_upd, float *Gauss_weights, int i, int j, int dimX, int dimY, int W_halfsize, int W_fullsize);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/Diffus4th_order_core.c b/src/Core/regularisers_CPU/Diffus4th_order_core.c
new file mode 100644
index 0000000..01f4f64
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffus4th_order_core.c
@@ -0,0 +1,250 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Diffus4th_order_core.h"
+#include "utils.h"
+
+#define EPS 1.0e-7
+
+/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma)
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for the explicit scheme
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
+{
+    int i,DimTotal;
+    float sigmaPar2;
+    float *W_Lapl=NULL;
+    sigmaPar2 = sigmaPar*sigmaPar;
+    DimTotal =  dimX*dimY*dimZ;
+    
+    W_Lapl = calloc(DimTotal, sizeof(float));
+    
+    /* copy into output */
+    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+    
+    if (dimZ == 1) {
+    /* running 2D diffusion iterations */
+    for(i=0; i < iterationsNumb; i++) {
+            /* Calculating weighted Laplacian */
+            Weighted_Laplc2D(W_Lapl, Output, sigmaPar2, dimX, dimY);
+            /* Perform iteration step */
+            Diffusion_update_step2D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY));
+		}
+	}
+	else {
+	/* running 3D diffusion iterations */
+    for(i=0; i < iterationsNumb; i++) {
+		    /* Calculating weighted Laplacian */
+            Weighted_Laplc3D(W_Lapl, Output, sigmaPar2, dimX, dimY, dimZ);
+            /* Perform iteration step */
+            Diffusion_update_step3D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+		}
+	}
+	free(W_Lapl);
+    return *Output;
+}
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY)
+{   
+    long i,j,i1,i2,j1,j2,index;
+    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
+
+        #pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
+        for(i=0; i<dimX; i++) {
+			 /* symmetric boundary conditions */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            for(j=0; j<dimY; j++) {
+				 /* symmetric boundary conditions */
+				j1 = j+1; if (j1 == dimY) j1 = j-1;
+				j2 = j-1; if (j2 < 0) j2 = j+1;
+				
+				index = j*dimX+i;
+				
+				gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
+				gradX_sq = pow(gradX,2);
+				
+				gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
+                gradY_sq = pow(gradY,2);
+                
+                gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
+                gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
+                
+                gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
+                xy_2 = 2.0f*gradX*gradY*gradXY;
+                
+                denom =  gradX_sq + gradY_sq;
+                
+                if (denom <= EPS) {
+                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
+                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS; 
+                    }
+                else  {
+                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
+                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;  
+                    }
+
+                c = 1.0f/(1.0f + denom/sigma);
+                c_sq = c*c;
+                
+                W_Lapl[index] = c_sq*V_norm + c*V_orth;
+            }
+        }
+        return *W_Lapl;
+}
+
+float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY)
+{
+	long i,j,i1,i2,j1,j2,index;
+    float gradXXc, gradYYc;
+
+            #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
+        for(i=0; i<dimX; i++) {
+			 /* symmetric boundary conditions */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            for(j=0; j<dimY; j++) {
+				 /* symmetric boundary conditions */
+				j1 = j+1; if (j1 == dimY) j1 = j-1;
+				j2 = j-1; if (j2 < 0) j2 = j+1;
+					index = j*dimX+i;
+					
+                    gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
+                    gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
+
+                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
+                }
+            }
+	return *Output;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY, long dimZ)
+{   
+    long i,j,k,i1,i2,j1,j2,k1,k2,index;
+    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
+        
+        #pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
+        for(i=0; i<dimX; i++) {
+			 /* symmetric boundary conditions */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            for(j=0; j<dimY; j++) {
+				/* symmetric boundary conditions */
+				j1 = j+1; if (j1 == dimY) j1 = j-1;
+				j2 = j-1; if (j2 < 0) j2 = j+1;
+				
+				for(k=0; k<dimZ; k++) {
+				/* symmetric boundary conditions */
+				k1 = k+1; if (k1 == dimZ) k1 = k-1;
+				k2 = k-1; if (k2 < 0) k2 = k+1;
+				
+				index = (dimX*dimY)*k + j*dimX+i;
+				
+				gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
+				gradX_sq = pow(gradX,2);
+				
+				gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
+                gradY_sq = pow(gradY,2);
+                
+                gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
+                gradZ_sq = pow(gradZ,2);
+                
+                gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
+                gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
+                gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
+                                
+                gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
+                gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
+                gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
+                
+                xy_2  = 2.0f*gradX*gradY*gradXY;
+                xyz_1 = 2.0f*gradX*gradZ*gradXZ;
+                xyz_2 = 2.0f*gradY*gradZ*gradYZ;
+                
+                denom =  gradX_sq + gradY_sq + gradZ_sq;
+                
+					if (denom <= EPS) {
+					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
+                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
+					}
+					else  {
+					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
+                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
+					}
+
+                c = 1.0f/(1.0f + denom/sigma);
+                c_sq = c*c;
+                
+                W_Lapl[index] = c_sq*V_norm + c*V_orth;
+				}
+            }
+        }
+        return *W_Lapl;
+}
+
+float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ)
+{
+	long i,j,i1,i2,j1,j2,index,k,k1,k2;
+    float gradXXc, gradYYc, gradZZc;
+
+        #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
+        for(i=0; i<dimX; i++) {
+			 /* symmetric boundary conditions */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            for(j=0; j<dimY; j++) {
+				/* symmetric boundary conditions */
+				j1 = j+1; if (j1 == dimY) j1 = j-1;
+				j2 = j-1; if (j2 < 0) j2 = j+1;
+				
+				for(k=0; k<dimZ; k++) {
+				/* symmetric boundary conditions */
+				k1 = k+1; if (k1 == dimZ) k1 = k-1;
+				k2 = k-1; if (k2 < 0) k2 = k+1;
+				
+				index = (dimX*dimY)*k + j*dimX+i;
+				
+                    gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
+                    gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
+                    gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
+                    
+                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
+                }
+            }
+		}
+	return *Output;
+}
diff --git a/src/Core/regularisers_CPU/Diffus4th_order_core.h b/src/Core/regularisers_CPU/Diffus4th_order_core.h
new file mode 100644
index 0000000..d81afcb
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffus4th_order_core.h
@@ -0,0 +1,55 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma)
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for explicit scheme
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY);
+CCPI_EXPORT float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY);
+CCPI_EXPORT float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/Diffusion_core.c b/src/Core/regularisers_CPU/Diffusion_core.c
new file mode 100644
index 0000000..b765796
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffusion_core.c
@@ -0,0 +1,307 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Diffusion_core.h"
+#include "utils.h"
+
+#define EPS 1.0e-5
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*sign function*/
+int signNDFc(float x) {
+    return (x > 0) - (x < 0);
+}
+
+/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ)
+{
+    int i;
+    float sigmaPar2;
+    sigmaPar2 = sigmaPar/sqrt(2.0f);
+    
+    /* copy into output */
+    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+    
+    if (dimZ == 1) {
+    /* running 2D diffusion iterations */
+    for(i=0; i < iterationsNumb; i++) {
+            if (sigmaPar == 0.0f) LinearDiff2D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
+            else NonLinearDiff2D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
+		}
+	}
+	else {
+	/* running 3D diffusion iterations */
+    for(i=0; i < iterationsNumb; i++) {
+            if (sigmaPar == 0.0f) LinearDiff3D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+            else NonLinearDiff3D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
+		}
+	}
+    return *Output;
+}
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY)
+{
+	long i,j,i1,i2,j1,j2,index;
+	float e,w,n,s,e1,w1,n1,s1;
+	
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = j*dimX+i;
+            
+                e = Output[j*dimX+i1];
+                w = Output[j*dimX+i2];
+                n = Output[j1*dimX+i];
+                s = Output[j2*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));  
+		}}
+	return *Output;
+}
+
+/* nonlinear diffusion */
+float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
+{
+	long i,j,i1,i2,j1,j2,index;
+	float e,w,n,s,e1,w1,n1,s1;
+	
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = j*dimX+i;
+            
+                e = Output[j*dimX+i1];
+                w = Output[j*dimX+i2];
+                n = Output[j1*dimX+i];
+                s = Output[j2*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                
+            if (penaltytype == 1){
+            /* Huber penalty */
+            if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
+            else e1 = e1/sigmaPar;
+            
+            if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
+            else w1 = w1/sigmaPar;
+            
+            if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
+            else n1 = n1/sigmaPar;
+            
+            if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
+            else s1 = s1/sigmaPar;
+            }
+            else if (penaltytype == 2) {
+            /* Perona-Malik */
+            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+            }
+            else if (penaltytype == 3) {
+            /* Tukey Biweight */
+            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+            else e1 = 0.0f;
+            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+            else w1 = 0.0f;
+            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+            else n1 = 0.0f;
+            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+            else s1 = 0.0f;
+            }
+            else {
+				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+				break;
+				}
+           Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));  
+		}}
+	return *Output;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
+{
+	long i,j,k,i1,i2,j1,j2,k1,k2,index;
+	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+	
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+	k1 = k+1; if (k1 == dimZ) k1 = k-1;
+    k2 = k-1; if (k2 < 0) k2 = k+1;
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = (dimX*dimY)*k + j*dimX+i;
+            
+                e = Output[(dimX*dimY)*k + j*dimX+i1];
+                w = Output[(dimX*dimY)*k + j*dimX+i2];
+                n = Output[(dimX*dimY)*k + j1*dimX+i];
+                s = Output[(dimX*dimY)*k + j2*dimX+i];
+                u = Output[(dimX*dimY)*k1 + j*dimX+i];
+                d = Output[(dimX*dimY)*k2 + j*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                u1 = u - Output[index];
+                d1 = d - Output[index];
+                
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
+		}}}
+	return *Output;
+}
+
+float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
+{
+	long i,j,k,i1,i2,j1,j2,k1,k2,index;
+	float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+	
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+	k1 = k+1; if (k1 == dimZ) k1 = k-1;
+    k2 = k-1; if (k2 < 0) k2 = k+1;
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = (dimX*dimY)*k + j*dimX+i;
+            
+                e = Output[(dimX*dimY)*k + j*dimX+i1];
+                w = Output[(dimX*dimY)*k + j*dimX+i2];
+                n = Output[(dimX*dimY)*k + j1*dimX+i];
+                s = Output[(dimX*dimY)*k + j2*dimX+i];
+                u = Output[(dimX*dimY)*k1 + j*dimX+i];
+                d = Output[(dimX*dimY)*k2 + j*dimX+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                u1 = u - Output[index];
+                d1 = d - Output[index];
+                
+             if (penaltytype == 1){
+            /* Huber penalty */
+            if (fabs(e1) > sigmaPar) e1 =  signNDFc(e1);
+            else e1 = e1/sigmaPar;
+            
+            if (fabs(w1) > sigmaPar) w1 =  signNDFc(w1);
+            else w1 = w1/sigmaPar;
+            
+            if (fabs(n1) > sigmaPar) n1 =  signNDFc(n1);
+            else n1 = n1/sigmaPar;
+            
+            if (fabs(s1) > sigmaPar) s1 =  signNDFc(s1);
+            else s1 = s1/sigmaPar;
+            
+            if (fabs(u1) > sigmaPar) u1 =  signNDFc(u1);
+            else u1 = u1/sigmaPar;
+            
+            if (fabs(d1) > sigmaPar) d1 =  signNDFc(d1);
+            else d1 = d1/sigmaPar;            
+            }
+            else if (penaltytype == 2) {
+            /* Perona-Malik */
+            e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+            w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+            n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+            s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+            u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
+            d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
+            }
+            else if (penaltytype == 3) {
+            /* Tukey Biweight */
+            if (fabs(e1) <= sigmaPar) e1 =  e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+            else e1 = 0.0f;
+            if (fabs(w1) <= sigmaPar) w1 =  w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+            else w1 = 0.0f;
+            if (fabs(n1) <= sigmaPar) n1 =  n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+            else n1 = 0.0f;
+            if (fabs(s1) <= sigmaPar) s1 =  s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+            else s1 = 0.0f;
+            if (fabs(u1) <= sigmaPar) u1 =  u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
+            else u1 = 0.0f;
+            if (fabs(d1) <= sigmaPar) d1 =  d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
+            else d1 = 0.0f;
+            }
+            else {
+				printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+				break;
+				}
+
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));  
+		}}}
+	return *Output;
+}
diff --git a/src/Core/regularisers_CPU/Diffusion_core.h b/src/Core/regularisers_CPU/Diffusion_core.h
new file mode 100644
index 0000000..cc36dad
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffusion_core.h
@@ -0,0 +1,59 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb,  float tau, int penaltytype, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY);
+CCPI_EXPORT float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY);
+CCPI_EXPORT float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/FGP_TV_core.c b/src/Core/regularisers_CPU/FGP_TV_core.c
new file mode 100644
index 0000000..68d58b7
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_TV_core.c
@@ -0,0 +1,321 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambdaPar - regularization parameter 
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant 
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default) 
+ * 7. print information: 0 (off) or 1 (on) 
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+ 
+float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+	int ll;
+    long j, DimTotal;
+	float re, re1;
+	float tk = 1.0f;
+    float tkp1=1.0f;
+    int count = 0;
+	
+	if (dimZ <= 1) {
+		/*2D case */
+		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+		DimTotal = (long)(dimX*dimY);
+		
+        Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float)); 
+		
+		/* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+            
+            /* computing the gradient of the objective function */
+            Obj_func2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+            
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+            
+            /* projection step */
+            Proj_func2D(P1, P2, methodTV, DimTotal);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+            
+            /* check early stopping criteria */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+            
+            /*storing old values*/
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
+            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);   
+		free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);		
+	}
+	else {
+		/*3D case*/
+		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;		
+		DimTotal = (long)(dimX*dimY*dimZ);        
+        
+        Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P3 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));        
+        P3_prev = calloc(DimTotal, sizeof(float));        
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float)); 
+        R3 = calloc(DimTotal, sizeof(float)); 
+		
+		    /* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+            
+            /* computing the gradient of the objective function */
+            Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}  
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* projection step */
+            Proj_func3D(P1, P2, P3, methodTV, DimTotal);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+            
+            /* calculate norm - stopping rules*/
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            /* stop if the norm residual is less than the tolerance EPS */
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+                        
+            /*storing old values*/
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            tk = tkp1;            
+        }	
+		if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);   
+		free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3);
+	}
+	return *Output;
+}
+
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+    float val1, val2;
+    long i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* boundary conditions  */
+            if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+            D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+        }}
+    return *D;
+}
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda,  long dimX, long dimY)
+{
+    float val1, val2, multip;
+    long i,j,index;
+    multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* boundary conditions */
+            if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+            if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+            P1[index] = R1[index] + multip*val1;
+            P2[index] = R2[index] + multip*val2;
+        }}
+    return 1;
+}
+float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal)
+{
+    float val1, val2, denom, sq_denom;
+    long i;
+    if (methTV == 0) {
+        /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
+        for(i=0; i<DimTotal; i++) {
+                denom = powf(P1[i],2) +  powf(P2[i],2);
+                if (denom > 1.0f) {
+					sq_denom = 1.0f/sqrtf(denom);
+                    P1[i] = P1[i]*sq_denom;
+                    P2[i] = P2[i]*sq_denom;
+                }
+            }
+    }
+    else {
+        /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
+        for(i=0; i<DimTotal; i++) {
+                val1 = fabs(P1[i]);
+                val2 = fabs(P2[i]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[i] = P1[i]/val1;
+                P2[i] = P2[i]/val2;
+            }
+    }
+    return 1;
+}
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
+{
+    long i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+    for(i=0; i<DimTotal; i++) {       
+            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+        }
+    return 1;
+}
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
+{
+    float val1, val2, val3;
+    long i,j,k,index;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* boundary conditions */
+                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
+                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
+                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
+                D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+            }}}
+    return *D;
+}
+float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
+{
+    float val1, val2, val3, multip;
+    long i,j,k, index;
+    multip = (1.0f/(26.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;				
+                /* boundary conditions */
+                if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+                if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+                if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+                P1[index] = R1[index] + multip*val1;
+                P2[index] = R2[index] + multip*val2;
+                P3[index] = R3[index] + multip*val3;
+            }}}
+    return 1;
+}
+float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
+{		
+    float val1, val2, val3, denom, sq_denom;
+    long i;
+    if (methTV == 0) {
+	/* isotropic TV*/
+	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+    for(i=0; i<DimTotal; i++) {        
+				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+                if (denom > 1.0f) {
+					sq_denom = 1.0f/sqrtf(denom);
+                    P1[i] = P1[i]*sq_denom;
+                    P2[i] = P2[i]*sq_denom;
+                    P3[i] = P3[i]*sq_denom;
+                }
+			}
+	}    
+    else {
+    /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
+    for(i=0; i<DimTotal; i++) {
+                val1 = fabs(P1[i]);
+                val2 = fabs(P2[i]);
+                val3 = fabs(P3[i]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}                
+                P1[i] = P1[i]/val1;
+                P2[i] = P2[i]/val2;
+                P3[i] = P3[i]/val3;
+            }
+		}
+    return 1;
+}
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
+{
+    long i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
+    for(i=0; i<DimTotal; i++) {
+                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+            }
+    return 1;
+}
diff --git a/src/Core/regularisers_CPU/FGP_TV_core.h b/src/Core/regularisers_CPU/FGP_TV_core.h
new file mode 100644
index 0000000..3418604
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_TV_core.h
@@ -0,0 +1,63 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter 
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant 
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default) 
+ * 7. print information: 0 (off) or 1 (on) 
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal);
+
+CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/FGP_dTV_core.c b/src/Core/regularisers_CPU/FGP_dTV_core.c
new file mode 100644
index 0000000..17b75ff
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_dTV_core.c
@@ -0,0 +1,441 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "FGP_dTV_core.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+ 
+float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+	int ll;
+    long j, DimTotal;
+	float re, re1;
+	float tk = 1.0f;
+    float tkp1=1.0f;
+    int count = 0;
+	
+	if (dimZ <= 1) {
+		/*2D case */
+		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL;
+		DimTotal = (long)(dimX*dimY);
+		
+        Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float)); 
+        InputRef_x = calloc(DimTotal, sizeof(float)); 
+        InputRef_y = calloc(DimTotal, sizeof(float)); 
+
+		/* calculate gradient field (smoothed) for the reference image */
+		GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, (long)(dimX), (long)(dimY));
+		
+		/* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+            
+            /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/                    
+            ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, (long)(dimX), (long)(dimY));
+            
+            /* computing the gradient of the objective function */
+            Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+            
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, (long)(dimX), (long)(dimY));
+            
+            /* projection step */
+            Proj_dfunc2D(P1, P2, methodTV, DimTotal);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+            
+            /* check early stopping criteria */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+            
+            /*storing old values*/
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
+            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);   
+		free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y);
+	}
+	else {
+		/*3D case*/
+		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL; 
+		DimTotal = (long)(dimX*dimY*dimZ);
+        
+        Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P3 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));
+        P3_prev = calloc(DimTotal, sizeof(float));
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float)); 
+        R3 = calloc(DimTotal, sizeof(float)); 
+        InputRef_x = calloc(DimTotal, sizeof(float)); 
+        InputRef_y = calloc(DimTotal, sizeof(float)); 
+        InputRef_z = calloc(DimTotal, sizeof(float)); 
+
+		/* calculate gradient field (smoothed) for the reference volume */
+		GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, (long)(dimX), (long)(dimY), (long)(dimZ));
+		
+		/* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+
+			 /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+            ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* computing the gradient of the objective function */
+            Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}  
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* projection step */
+            Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+            
+            /* calculate norm - stopping rules*/
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            /* stop if the norm residual is less than the tolerance EPS */
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+                        
+            /*storing old values*/
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            tk = tkp1;            
+        }	
+		if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);   
+		free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); free(InputRef_x); free(InputRef_y); free(InputRef_z);
+	}
+	return *Output;
+}
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+
+float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, long dimY)
+{
+    long i,j,index;
+    float val1, val2, gradX, gradY, magn;
+#pragma omp parallel for shared(B, B_x, B_y) private(i,j,index,val1,val2,gradX,gradY,magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* zero boundary conditions */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[j*dimX + (i+1)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(j+1)*dimX + i];}
+            gradX = val1 - B[index];
+            gradY = val2 - B[index];
+            magn = pow(gradX,2) + pow(gradY,2);
+            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+            B_x[index] = gradX/magn;
+            B_y[index] = gradY/magn;
+        }}
+    return 1;
+}
+
+float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX, long dimY)
+{
+    long i,j,index;
+    float in_prod;
+#pragma omp parallel for shared(R1, R2, B_x, B_y) private(index,i,j,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index];   /* calculate inner product */
+            R1[index] = R1[index] - in_prod*B_x[index];
+            R2[index] = R2[index] - in_prod*B_y[index];
+        }}
+    return 1;
+}
+
+float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+    float val1, val2;
+    long i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* boundary conditions  */
+            if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+            D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+        }}
+    return *D;
+}
+float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, long dimX, long dimY)
+{
+    float val1, val2, multip, in_prod;
+    long i,j,index;
+    multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,B_x,B_y,multip) private(i,j,index,val1,val2,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* boundary conditions */
+            if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+            if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+            
+            in_prod = val1*B_x[index] + val2*B_y[index];   /* calculate inner product */
+            val1 = val1 - in_prod*B_x[index];
+            val2 = val2 - in_prod*B_y[index];
+            
+            P1[index] = R1[index] + multip*val1;
+            P2[index] = R2[index] + multip*val2;
+
+        }}
+    return 1;
+}
+float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal)
+{
+    float val1, val2, denom, sq_denom;
+    long i;
+    if (methTV == 0) {
+        /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
+        for(i=0; i<DimTotal; i++) {
+                denom = powf(P1[i],2) +  powf(P2[i],2);
+                if (denom > 1.0f) {
+					sq_denom = 1.0f/sqrtf(denom);
+                    P1[i] = P1[i]*sq_denom;
+                    P2[i] = P2[i]*sq_denom;
+                }
+            }
+    }
+    else {
+        /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
+        for(i=0; i<DimTotal; i++) {
+                val1 = fabs(P1[i]);
+                val2 = fabs(P2[i]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[i] = P1[i]/val1;
+                P2[i] = P2[i]/val2;
+            }
+    }
+    return 1;
+}
+float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
+{
+    long i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+    for(i=0; i<DimTotal; i++) {       
+            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+        }
+    return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, long dimX, long dimY, long dimZ)
+{
+    long i, j, k, index;
+    float val1, val2, val3, gradX, gradY, gradZ, magn;
+#pragma omp parallel for shared(B, B_x, B_y, B_z) private(i,j,k,index,val1,val2,val3,gradX,gradY,gradZ,magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+			index = (dimX*dimY)*k + j*dimX+i;
+			
+            /* zero boundary conditions */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[(dimX*dimY)*k + j*dimX+(i+1)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(dimX*dimY)*k + (j+1)*dimX+i];}
+            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = B[(dimX*dimY)*(k+1) + (j)*dimX+i];}
+            
+            gradX = val1 - B[index];
+            gradY = val2 - B[index];
+            gradZ = val3 - B[index];
+            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+            B_x[index] = gradX/magn;
+            B_y[index] = gradY/magn;
+            B_z[index] = gradZ/magn;
+        }}}
+    return 1;
+}
+
+float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, long dimX, long dimY, long dimZ)
+{
+    long i,j,k,index;
+    float in_prod;
+#pragma omp parallel for shared(R1, R2, R3, B_x, B_y, B_z) private(index,i,j,k,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+			index = (dimX*dimY)*k + j*dimX+i;
+            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index] + R3[index]*B_z[index];   /* calculate inner product */
+            R1[index] = R1[index] - in_prod*B_x[index];
+            R2[index] = R2[index] - in_prod*B_y[index];
+            R3[index] = R3[index] - in_prod*B_z[index];
+        }}}
+    return 1;
+}
+
+float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
+{
+    float val1, val2, val3;
+    long i,j,k,index;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* boundary conditions */
+                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
+                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
+                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
+                D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+            }}}
+    return *D;
+}
+float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, long dimX, long dimY, long dimZ)
+{
+    float val1, val2, val3, multip, in_prod;
+    long i,j,k, index;
+    multip = (1.0f/(26.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;				
+                /* boundary conditions */
+                if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+                if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+                if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+                
+                in_prod = val1*B_x[index] + val2*B_y[index] + val3*B_z[index];   /* calculate inner product */
+                val1 = val1 - in_prod*B_x[index];
+                val2 = val2 - in_prod*B_y[index];
+                val3 = val3 - in_prod*B_z[index];
+                
+                P1[index] = R1[index] + multip*val1;
+                P2[index] = R2[index] + multip*val2;
+                P3[index] = R3[index] + multip*val3;
+            }}}
+    return 1;
+}
+float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
+{		
+    float val1, val2, val3, denom, sq_denom;
+    long i;
+    if (methTV == 0) {
+	/* isotropic TV*/
+	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+    for(i=0; i<DimTotal; i++) {        
+				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+                if (denom > 1.0f) {
+					sq_denom = 1.0f/sqrtf(denom);
+                    P1[i] = P1[i]*sq_denom;
+                    P2[i] = P2[i]*sq_denom;
+                    P3[i] = P3[i]*sq_denom;
+                }
+			}
+	}    
+    else {
+    /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
+    for(i=0; i<DimTotal; i++) {
+                val1 = fabs(P1[i]);
+                val2 = fabs(P2[i]);
+                val3 = fabs(P3[i]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}                
+                P1[i] = P1[i]/val1;
+                P2[i] = P2[i]/val2;
+                P3[i] = P3[i]/val3;
+            }
+		}
+    return 1;
+}
+float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
+{
+    long i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
+    for(i=0; i<DimTotal; i++) {
+                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+            }
+    return 1;
+}
diff --git a/src/Core/regularisers_CPU/FGP_dTV_core.h b/src/Core/regularisers_CPU/FGP_dTV_core.h
new file mode 100644
index 0000000..442dd30
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_dTV_core.h
@@ -0,0 +1,72 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, long dimY);
+CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX, long dimY);
+CCPI_EXPORT float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal);
+
+CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/LLT_ROF_core.c b/src/Core/regularisers_CPU/LLT_ROF_core.c
new file mode 100644
index 0000000..8416a14
--- /dev/null
+++ b/src/Core/regularisers_CPU/LLT_ROF_core.c
@@ -0,0 +1,410 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "LLT_ROF_core.h"
+#define EPS_LLT 0.01
+#define EPS_ROF 1.0e-12
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*sign function*/
+int signLLT(float x) {
+    return (x > 0) - (x < 0);
+}
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+ * 
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
+* lambdaLLT starting with smaller values. 
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step 
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References: 
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
+{
+		long DimTotal;
+        int ll;
+		float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL;
+		
+		DimTotal = (long)(dimX*dimY*dimZ);
+        
+        D1_ROF = calloc(DimTotal, sizeof(float));
+        D2_ROF = calloc(DimTotal, sizeof(float));
+        D3_ROF = calloc(DimTotal, sizeof(float));
+        
+        D1_LLT = calloc(DimTotal, sizeof(float));
+        D2_LLT = calloc(DimTotal, sizeof(float));
+        D3_LLT = calloc(DimTotal, sizeof(float));
+        
+        copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize  */
+       
+		for(ll = 0; ll < iterationsNumb; ll++) {            
+            if (dimZ == 1) {
+			/* 2D case */
+			/****************ROF******************/
+			 /* calculate first-order differences */
+            D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), 1l);
+            D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), 1l);
+            /****************LLT******************/
+            /* estimate second-order derrivatives */
+            der2D_LLT(Output, D1_LLT, D2_LLT, (long)(dimX), (long)(dimY), 1l);
+            /* Joint update for ROF and LLT models */
+            Update2D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), 1l);
+            }
+            else {
+			/* 3D case */
+			/* calculate first-order differences */
+            D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
+            D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
+            D3_func_ROF(Output, D3_ROF, (long)(dimX), (long)(dimY), (long)(dimZ)); 
+            /****************LLT******************/
+            /* estimate second-order derrivatives */
+            der3D_LLT(Output, D1_LLT, D2_LLT, D3_LLT,(long)(dimX), (long)(dimY), (long)(dimZ));
+            /* Joint update for ROF and LLT models */
+            Update3D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+			}
+        } /*end of iterations*/
+    free(D1_LLT);free(D2_LLT);free(D3_LLT);
+    free(D1_ROF);free(D2_ROF);free(D3_ROF);
+	return *Output;
+}
+
+/*************************************************************************/
+/**********************LLT-related functions *****************************/
+/*************************************************************************/
+float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ)
+{
+	long i, j, index, i_p, i_m, j_m, j_p;
+	float dxx, dyy, denom_xx, denom_yy;
+#pragma omp parallel for shared(U,D1,D2) private(i, j, index, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			index = j*dimX+i;
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+			dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
+			dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
+
+			denom_xx = fabs(dxx) + EPS_LLT;
+			denom_yy = fabs(dyy) + EPS_LLT;
+
+			D1[index] = dxx / denom_xx;
+			D2[index] = dyy / denom_yy;
+		}
+	}
+	return 1;
+}
+
+float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ)
+ {
+ 	long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
+ 	float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+ #pragma omp parallel for shared(U,D1,D2,D3) private(i, j, index, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
+ 	for (i = 0; i<dimX; i++) {
+ 		for (j = 0; j<dimY; j++) {
+ 			for (k = 0; k<dimZ; k++) {
+				/* symmetric boundary conditions (Neuman) */
+				i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+				i_m = i - 1; if (i_m < 0) i_m = i + 1;
+				j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+				j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ 				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ 				k_m = k - 1; if (k_m < 0) k_m = k + 1;
+				
+				index = (dimX*dimY)*k + j*dimX+i;
+ 
+ 				dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
+ 				dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
+ 				dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
+ 
+ 				denom_xx = fabs(dxx) + EPS_LLT;
+ 				denom_yy = fabs(dyy) + EPS_LLT;
+ 				denom_zz = fabs(dzz) + EPS_LLT;
+ 
+ 				D1[index] = dxx / denom_xx;
+ 				D2[index] = dyy / denom_yy;
+ 				D3[index] = dzz / denom_zz;
+ 			}
+ 		}
+ 	}
+ 	return 1;
+ }
+
+/*************************************************************************/
+/**********************ROF-related functions *****************************/
+/*************************************************************************/
+
+/* calculate differences 1 */
+float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ)
+{
+    float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+    long i,j,k,i1,i2,k1,j1,j2,k2,index;
+    
+    if (dimZ > 1) {
+#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                for(k=0; k<dimZ; k++) {
+					index = (dimX*dimY)*k + j*dimX+i;
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
+                    NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
+                    /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */  /* x- */
+                    NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+                    
+                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMx_1*NOMx_1;
+                    denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5f*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T1 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+                    D1[index] = NOMx_1/T1;
+                }}}
+    }
+    else {
+#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+				index = j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+                /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+                NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
+                
+                denom1 = NOMx_1*NOMx_1;
+                denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+                denom2 = denom2*denom2;
+                T1 = sqrtf(denom1 + denom2 + EPS_ROF);
+                D1[index] = NOMx_1/T1;
+            }}
+    }
+    return *D1;
+}
+/* calculate differences 2 */
+float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ)
+{
+    float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+    long i,j,k,i1,i2,k1,j1,j2,k2,index;
+    
+    if (dimZ > 1) {
+#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                for(k=0; k<dimZ; k++) {
+					index = (dimX*dimY)*k + j*dimX+i;
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+                    NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+                    NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMy_1*NOMy_1;
+                    denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5f*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T2 = sqrtf(denom1 + denom2 + denom3 + EPS_ROF);
+                    D2[index] = NOMy_1/T2;
+                }}}
+    }
+    else {
+#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+				index = j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+                NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
+                /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */  /* y- */
+                
+                denom1 = NOMy_1*NOMy_1;
+                denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+                denom2 = denom2*denom2;
+                T2 = sqrtf(denom1 + denom2 + EPS_ROF);
+                D2[index] = NOMy_1/T2;
+            }}
+    }
+    return *D2;
+}
+
+/* calculate differences 3 */
+float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
+{
+    float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+    long index,i,j,k,i1,i2,k1,j1,j2,k2;
+    
+#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                k2 = k - 1; if (k2 < 0) k2 = k+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+                NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+                NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+                NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+                /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
+                
+                denom1 = NOMz_1*NOMz_1;
+                denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+                denom2 = denom2*denom2;
+                denom3 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+                denom3 = denom3*denom3;
+                T3 = sqrtf(denom1 + denom2 + denom3 + EPS_ROF);
+                D3[index] = NOMz_1/T3;
+            }}}
+    return *D3;
+}
+
+/*************************************************************************/
+/**********************ROF-LLT-related functions *************************/
+/*************************************************************************/
+
+float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
+{
+	long i, j, index, i_p, i_m, j_m, j_p;
+	float div, laplc, dxx, dyy, dv1, dv2;
+#pragma omp parallel for shared(U,U0) private(i, j, index, i_p, i_m, j_m, j_p, laplc, div, dxx, dyy, dv1, dv2)
+	for (i = 0; i<dimX; i++) {
+		for (j = 0; j<dimY; j++) {
+			index = j*dimX+i;
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+			
+			/*LLT-related part*/
+			dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
+			dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
+			laplc = dxx + dyy; /*build Laplacian*/
+			
+			/*ROF-related part*/
+			dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
+            dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
+			div = dv1 + dv2; /*build Divirgent*/
+            
+			/*combine all into one cost function to minimise */
+            U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+		}
+	}
+	return *U;
+}
+
+float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
+{
+	long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
+	float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
+#pragma omp parallel for shared(U,U0) private(i, j, k, index, i_p, i_m, j_m, j_p, k_p, k_m, laplc, div, dxx, dyy, dzz, dv1, dv2, dv3)
+ 	for (i = 0; i<dimX; i++) {
+ 		for (j = 0; j<dimY; j++) {
+ 			for (k = 0; k<dimZ; k++) {
+				/* symmetric boundary conditions (Neuman) */
+				i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+				i_m = i - 1; if (i_m < 0) i_m = i + 1;
+				j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+				j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ 				k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ 				k_m = k - 1; if (k_m < 0) k_m = k + 1;
+			
+				index = (dimX*dimY)*k + j*dimX+i;
+			
+				/*LLT-related part*/
+				dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
+				dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
+				dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
+				laplc = dxx + dyy + dzz; /*build Laplacian*/
+			
+				/*ROF-related part*/
+				dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
+				dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
+				dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
+				div = dv1 + dv2 + dv3; /*build Divirgent*/
+            
+				/*combine all into one cost function to minimise */
+				U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+			}
+		}
+	}
+	return *U;
+}
+
diff --git a/src/Core/regularisers_CPU/LLT_ROF_core.h b/src/Core/regularisers_CPU/LLT_ROF_core.h
new file mode 100644
index 0000000..8e6591e
--- /dev/null
+++ b/src/Core/regularisers_CPU/LLT_ROF_core.h
@@ -0,0 +1,65 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+ * 
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
+* lambdaLLT starting with smaller values. 
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step 
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References: 
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ);
+
+CCPI_EXPORT float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ);
+
+CCPI_EXPORT float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.c b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
new file mode 100644
index 0000000..c4c9118
--- /dev/null
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
@@ -0,0 +1,173 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Nonlocal_TV_core.h"
+
+/* C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights 
+ * 6. regularisation parameter
+ * 7. iterations number 
+ 
+ * Output:
+ * 1. denoised image/volume 	
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+ 
+ */
+/*****************************************************************************/
+
+float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb)
+{
+
+    long i, j, k;
+    int iter;
+    lambdaReg = 1.0f/lambdaReg;
+         
+    /*****2D INPUT *****/
+    if (dimZ == 0) {
+	  copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
+    /* for each pixel store indeces of the most similar neighbours (patches) */
+     for(iter=0; iter<IterNumb; iter++) {    
+#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
+      for(i=0; i<(long)(dimX); i++) {
+            for(j=0; j<(long)(dimY); j++) {              
+             /*NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);*/  /* NLM - H1 penalty */
+             NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);  /* NLM - TV penalty */
+           }}
+          }
+    }  
+    else {
+     /*****3D INPUT *****/
+        copyIm(A_orig, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+    /* for each pixel store indeces of the most similar neighbours (patches) */
+     for(iter=0; iter<IterNumb; iter++) {    
+#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, H_k, iter) private(i,j,k)
+      for(i=0; i<(long)(dimX); i++) {
+            for(j=0; j<(long)(dimY); j++) {              
+               for(k=0; k<(long)(dimZ); k++) {
+            /* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambdaReg); */ /* NLM - H1 penalty */
+            NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg);   /* NLM - TV penalty */     
+           }}}          
+          }          
+    }
+    return *Output;
+}
+
+/***********<<<<Main Function for NLM - H1 penalty>>>>**********/
+float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
+{
+	long x, i1, j1, index, index_m; 
+	float value = 0.0f, normweight  = 0.0f;
+	
+	index_m = j*dimX+i;
+	for(x=0; x < NumNeighb; x++) {
+	index =  (dimX*dimY*x) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+		value += A[j1*dimX+i1]*Weights[index];
+		normweight += Weights[index];
+	}
+	 A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
+    return *A;
+}
+/*3D version*/
+float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
+{
+	long x, i1, j1, k1, index; 
+	float value = 0.0f, normweight  = 0.0f;
+	
+	for(x=0; x < NumNeighb; x++) {
+	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+		k1 = H_k[index];
+		value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
+		normweight += Weights[index];
+	}	
+    A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
+    return *A;
+}
+
+
+/***********<<<<Main Function for NLM - TV penalty>>>>**********/
+float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
+{
+	long x, i1, j1, index, index_m; 
+	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+	
+	 index_m = j*dimX+i;
+		
+	for(x=0; x < NumNeighb; x++) {
+		index =  (dimX*dimY*x) + j*dimX+i; /*c*/
+		i1 = H_i[index];
+		j1 = H_j[index];
+		NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index];
+	}
+  
+    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
+    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
+    		
+    for(x=0; x < NumNeighb; x++) {
+	index =  (dimX*dimY*x) + j*dimX+i; /*c*/
+	i1 = H_i[index];
+	j1 = H_j[index];
+        value += A[j1*dimX+i1]*NLCoeff*Weights[index];
+        normweight += Weights[index]*NLCoeff;
+    }   		
+    A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
+    return *A;
+}
+/*3D version*/
+float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
+{
+	long x, i1, j1, k1, index; 
+	float value = 0.0f, normweight  = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+	
+	for(x=0; x < NumNeighb; x++) {
+	index =  dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+		i1 = H_i[index];
+		j1 = H_j[index];
+		k1 = H_k[index];
+	        NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
+	}
+  
+    NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
+    NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
+    		
+    for(x=0; x < NumNeighb; x++) {
+	index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+	i1 = H_i[index];
+	j1 = H_j[index];
+	k1 = H_k[index];
+        value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index];
+        normweight += Weights[index]*NLCoeff;
+    }   		
+    A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
+    return *A;
+}
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.h b/src/Core/regularisers_CPU/Nonlocal_TV_core.h
new file mode 100644
index 0000000..6d55101
--- /dev/null
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.h
@@ -0,0 +1,61 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+#define EPS 1.0000e-9
+
+/* C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights 
+ * 6. regularisation parameter
+ * 7. iterations number 
+ 
+ * Output:
+ * 1. denoised image/volume 	
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans.   Image Processing 17, no. 7 (2008): 1047-1060. 
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
+CCPI_EXPORT float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
+CCPI_EXPORT float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
+CCPI_EXPORT float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
+CCPI_EXPORT float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.c b/src/Core/regularisers_CPU/PatchSelect_core.c
new file mode 100644
index 0000000..cf5cdc7
--- /dev/null
+++ b/src/Core/regularisers_CPU/PatchSelect_core.c
@@ -0,0 +1,345 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PatchSelect_core.h"
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+
+void swap(float *xp, float *yp) 
+{ 
+    float temp = *xp; 
+    *xp = *yp; 
+    *yp = temp; 
+} 
+
+void swapUS(unsigned short *xp, unsigned short *yp) 
+{ 
+    unsigned short temp = *xp; 
+    *xp = *yp; 
+    *yp = temp; 
+} 
+/**************************************************/
+
+float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM)
+{
+    int counterG;
+    long i, j, k;
+    float *Eucl_Vec, h2;
+    h2 = h*h;   
+    /****************2D INPUT ***************/
+    if (dimZ == 0) {
+        /* generate a 2D Gaussian kernel for NLM procedure */
+        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
+        counterG = 0;
+        for(i=-SimilarWin; i<=SimilarWin; i++) {
+            for(j=-SimilarWin; j<=SimilarWin; j++) {
+                Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2*SimilarWin*SimilarWin));
+                counterG++;
+            }} /*main neighb loop */
+        /* for each pixel store indeces of the most similar neighbours (patches) */
+        if (switchM == 1) {
+#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
+    for(i=0; i<(long)(dimX); i++) {
+          for(j=0; j<(long)(dimY); j++) {
+                Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+            }}
+        }
+        else {
+#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
+    for(i=0; i<(long)(dimX); i++) {
+          for(j=0; j<(long)(dimY); j++) {
+                Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+            }}
+            }
+    }
+    else {
+    /****************3D INPUT ***************/       
+        /* generate a 3D Gaussian kernel for NLM procedure */
+        Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
+        counterG = 0;
+        for(i=-SimilarWin; i<=SimilarWin; i++) {
+            for(j=-SimilarWin; j<=SimilarWin; j++) {
+                for(k=-SimilarWin; k<=SimilarWin; k++) {
+                    Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
+                    counterG++;
+                }}} /*main neighb loop */     
+        
+        /* for each voxel store indeces of the most similar neighbours (patches) */
+        if (switchM == 1) {
+#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
+        for(i=0; i<dimX; i++) {
+            for(j=0; j<dimY; j++) {
+                for(k=0; k<dimZ; k++) {
+                    Indeces3D(A, H_i, H_j, H_k, Weights, j, i, (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+                }}}
+        }
+        else {
+#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
+        for(i=0; i<dimX; i++) {
+            for(j=0; j<dimY; j++) {
+                for(k=0; k<dimZ; k++) {
+                    Indeces3D(A, H_i, H_j, H_k, Weights, (i), (j), (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+                }}}
+            }
+    }
+    free(Eucl_Vec);
+    return 1;
+}
+
+float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
+    float *Weight_Vec, normsum;
+    unsigned short *ind_i, *ind_j;
+    
+    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
+    
+    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
+                            if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
+                                normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
+                                counterG++;
+                            }}
+                        
+                    }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {                    
+                    Weight_Vec[counter] = expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;                    
+                    counter++;
+                }
+            }
+        }}     
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }
+     /*sorting loop finished*/      
+    /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ 
+    for(x=0; x < NumNeighb; x++) {
+        index = (dimX*dimY*x) + j*dimX+i;        
+        H_i[index] = ind_i[x];
+        H_j[index] = ind_j[x];
+        Weights[index] = Weight_Vec[x];
+    }    
+    free(ind_i);
+    free(ind_j);
+    free(Weight_Vec);
+    return 1;
+}
+float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
+    float *Weight_Vec, normsum;
+    unsigned short *ind_i, *ind_j;
+    
+    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
+    
+    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
+                            if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
+                                //normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
+                                normsum += Eucl_Vec[counterG]*pow(Aorig[i3*dimY + (j3)] - Aorig[i2*dimY + (j2)], 2);
+                                counterG++;
+                            }}
+                        
+                    }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+            }
+        }}
+       /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }
+    /*sorting loop finished*/
+    
+    /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ 
+    for(x=0; x < NumNeighb; x++) {
+        index = (dimX*dimY*x) + i*dimY+j;       
+        H_i[index] = ind_i[x];
+        H_j[index] = ind_j[x];
+        Weights[index] = Weight_Vec[x];
+    }   
+    free(ind_i);
+    free(ind_j);
+    free(Weight_Vec);
+    return 1;
+}
+
+float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+    long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
+    float *Weight_Vec, normsum, temp;
+    unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
+    
+    sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
+    
+    Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+    ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+    
+    counter = 0l;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            for(k_m=-SearchWindow; k_m<=SearchWindow; k_m++) {
+                k1 = k+k_m;
+                i1 = i+i_m;
+                j1 = j+j_m;
+                if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY)) && ((k1 >= 0) && (k1 < dimZ))) {
+                    normsum = 0.0f; counterG = 0l;
+                    for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                        for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                            for(k_c=-SimilarWin; k_c<=SimilarWin; k_c++) {
+                                i2 = i1 + i_c;
+                                j2 = j1 + j_c;
+                                k2 = k1 + k_c;
+                                i3 = i + i_c;
+                                j3 = j + j_c;
+                                k3 = k + k_c;
+                                if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY)) && ((k2 >= 0) && (k2 < dimZ))) {
+                                    if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY)) && ((k3 >= 0) && (k3 < dimZ))) {
+                                        normsum += Eucl_Vec[counterG]*pow(Aorig[(dimX*dimY*k3) + j3*dimX + (i3)] - Aorig[(dimX*dimY*k2) + j2*dimX + (i2)], 2);
+                                        counterG++;
+                                    }}
+                            }}}
+                    /* writing temporarily into vectors */
+                    if (normsum > EPS) {
+                        Weight_Vec[counter] = expf(-normsum/h2);
+                        ind_i[counter] = i1;
+                        ind_j[counter] = j1;
+                        ind_k[counter] = k1;
+                        counter ++;
+                    }
+                }
+            }}}
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter; x++)  {
+        for (y = 0; y < counter; y++)  {
+            if (Weight_Vec[y] < Weight_Vec[x]) {
+                temp = Weight_Vec[y+1];
+                temp_i = ind_i[y+1];
+                temp_j = ind_j[y+1];
+                temp_k = ind_k[y+1];
+                Weight_Vec[y+1] = Weight_Vec[y];
+                Weight_Vec[y] = temp;
+                ind_i[y+1] = ind_i[y];
+                ind_i[y] = temp_i;
+                ind_j[y+1] = ind_j[y];
+                ind_j[y] = temp_j;
+                ind_k[y+1] = ind_k[y];
+                ind_k[y] = temp_k;
+            }}}
+    /*sorting loop finished*/
+    
+    /*now select the NumNeighb more prominent weights and store into arrays */
+    for(x=0; x < NumNeighb; x++) {
+        index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+        
+        H_i[index] = ind_i[x];
+        H_j[index] = ind_j[x];
+        H_k[index] = ind_k[x];
+        
+        Weights[index] = Weight_Vec[x];
+    }
+    
+    free(ind_i);
+    free(ind_j);
+    free(ind_k);
+    free(Weight_Vec);
+    return 1;
+}
+
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.h b/src/Core/regularisers_CPU/PatchSelect_core.h
new file mode 100644
index 0000000..ddaa428
--- /dev/null
+++ b/src/Core/regularisers_CPU/PatchSelect_core.h
@@ -0,0 +1,63 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+#define EPS 1.0000e-12
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+/*****************************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
+CCPI_EXPORT float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+CCPI_EXPORT float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+CCPI_EXPORT float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c
new file mode 100644
index 0000000..1858442
--- /dev/null
+++ b/src/Core/regularisers_CPU/ROF_TV_core.c
@@ -0,0 +1,289 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ROF_TV_core.h"
+
+#define EPS 1.0e-12
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*sign function*/
+int sign(float x) {
+    return (x > 0) - (x < 0);
+}
+
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+ *
+ * 
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ */
+
+/* Running iterations of TV-ROF function */
+float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
+{
+    float *D1, *D2, *D3;
+    int i; 
+    long DimTotal;
+    DimTotal = (long)(dimX*dimY*dimZ);    
+    
+    D1 = calloc(DimTotal, sizeof(float));
+    D2 = calloc(DimTotal, sizeof(float));
+    D3 = calloc(DimTotal, sizeof(float));
+	   
+    /* copy into output */
+    copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+        
+    /* start TV iterations */
+    for(i=0; i < iterationsNumb; i++) {            
+            /* calculate differences */
+            D1_func(Output, D1, (long)(dimX), (long)(dimY), (long)(dimZ));
+            D2_func(Output, D2, (long)(dimX), (long)(dimY), (long)(dimZ));
+            if (dimZ > 1) D3_func(Output, D3, (long)(dimX), (long)(dimY), (long)(dimZ)); 
+            TV_kernel(D1, D2, D3, Output, Input, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+		}           
+    free(D1);free(D2); free(D3);
+    return *Output;
+}
+
+/* calculate differences 1 */
+float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ)
+{
+    float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+    long i,j,k,i1,i2,k1,j1,j2,k2,index;
+    
+    if (dimZ > 1) {
+#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                for(k=0; k<dimZ; k++) {
+					index = (dimX*dimY)*k + j*dimX+i;
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;                    
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
+                    NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
+                    /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */  /* x- */
+                    NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+                    
+                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMx_1*NOMx_1;
+                    denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T1 = sqrt(denom1 + denom2 + denom3 + EPS);
+                    D1[index] = NOMx_1/T1;
+                }}}
+    }
+    else {
+#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+				index = j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+                /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+                NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
+                
+                denom1 = NOMx_1*NOMx_1;
+                denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+                denom2 = denom2*denom2;
+                T1 = sqrtf(denom1 + denom2 + EPS);
+                D1[index] = NOMx_1/T1;
+            }}
+    }
+    return *D1;
+}
+/* calculate differences 2 */
+float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ)
+{
+    float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+    long i,j,k,i1,i2,k1,j1,j2,k2,index;
+    
+    if (dimZ > 1) {
+#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                for(k=0; k<dimZ; k++) {
+                    index = (dimX*dimY)*k + j*dimX+i;
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;                    
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+                    NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+                    NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                    NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+                    NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMy_1*NOMy_1;
+                    denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T2 = sqrtf(denom1 + denom2 + denom3 + EPS);
+                    D2[index] = NOMy_1/T2;
+                }}}
+    }
+    else {
+#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+		index = j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+                NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+                NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
+                /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */  /* y- */
+                
+                denom1 = NOMy_1*NOMy_1;
+                denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+                denom2 = denom2*denom2;
+                T2 = sqrtf(denom1 + denom2 + EPS);
+                D2[index] = NOMy_1/T2;
+            }}
+    }
+    return *D2;
+}
+
+/* calculate differences 3 */
+float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ)
+{
+    float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+    long index,i,j,k,i1,i2,k1,j1,j2,k2;
+    
+#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2,  NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
+    for(j=0; j<dimY; j++) {
+        for(i=0; i<dimX; i++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                k2 = k - 1; if (k2 < 0) k2 = k+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+                NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+                NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+                NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+                /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
+                
+                denom1 = NOMz_1*NOMz_1;
+                denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+                denom2 = denom2*denom2;
+                denom3 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+                denom3 = denom3*denom3;
+                T3 = sqrtf(denom1 + denom2 + denom3 + EPS);
+                D3[index] = NOMz_1/T3;
+            }}}
+    return *D3;
+}
+
+/* calculate divergence */
+float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, long dimX, long dimY, long dimZ)
+{
+    float dv1, dv2, dv3;
+    long index,i,j,k,i1,i2,k1,j1,j2,k2;
+    
+    if (dimZ > 1) {
+#pragma omp parallel for shared (D1, D2, D3, B, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, dv1,dv2,dv3)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                for(k=0; k<dimZ; k++) {
+                    index = (dimX*dimY)*k + j*dimX+i;
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    /*divergence components */
+                    dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
+                    dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
+                    dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
+                    
+                    B[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index]));   
+                }}}
+    }
+    else {
+#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2)
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+                index = j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                
+                /* divergence components  */
+                dv1 = D1[index] - D1[j2*dimX + i];
+                dv2 = D2[index] - D2[j*dimX + i2];
+
+                B[index] += tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index]));
+            }}
+    }
+    return *B;
+}
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.h b/src/Core/regularisers_CPU/ROF_TV_core.h
new file mode 100644
index 0000000..4e320e9
--- /dev/null
+++ b/src/Core/regularisers_CPU/ROF_TV_core.h
@@ -0,0 +1,57 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+ *
+ * 
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/src/Core/regularisers_CPU/SB_TV_core.c b/src/Core/regularisers_CPU/SB_TV_core.c
new file mode 100755
index 0000000..769ea67
--- /dev/null
+++ b/src/Core/regularisers_CPU/SB_TV_core.c
@@ -0,0 +1,368 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "SB_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+ 
+float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+{
+	int ll;
+    long j, DimTotal;    
+	float re, re1, lambda;
+    int count = 0;
+    mu = 1.0f/mu;
+    lambda = 2.0f*mu;
+
+	if (dimZ <= 1) {
+		/* 2D case */
+		float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
+		DimTotal = (long)(dimX*dimY);
+		
+		Output_prev = calloc(DimTotal, sizeof(float));
+		Dx = calloc(DimTotal, sizeof(float));
+		Dy = calloc(DimTotal, sizeof(float));
+		Bx = calloc(DimTotal, sizeof(float));
+		By = calloc(DimTotal, sizeof(float));
+        
+        copyIm(Input, Output, (long)(dimX), (long)(dimY), 1l); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* storing old estimate */
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+            
+            /* perform two GS iterations (normally 2 is enough for the convergence) */
+            gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+            /*GS iteration */
+            gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
+            
+            /* TV-related step */
+            if (methodTV == 1)  updDxDy_shrinkAniso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
+            else updDxDy_shrinkIso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
+            
+            /* update for Bregman variables */
+            updBxBy2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY));
+            
+            /* check early stopping criteria if epsilon not equal zero */
+            if (epsil != 0) {
+            re = 0.0f; re1 = 0.0f;
+				for(j=0; j<DimTotal; j++) {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+				}
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+			}
+            /*printf("%f %i %i \n", re, ll, count); */
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+		free(Output_prev); free(Dx); free(Dy); free(Bx); free(By);
+	}
+	else {
+		/* 3D case */
+		float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
+		DimTotal = (long)(dimX*dimY*dimZ);
+		
+		Output_prev = calloc(DimTotal, sizeof(float));
+		Dx = calloc(DimTotal, sizeof(float));
+		Dy = calloc(DimTotal, sizeof(float));
+		Dz = calloc(DimTotal, sizeof(float));
+		Bx = calloc(DimTotal, sizeof(float));
+		By = calloc(DimTotal, sizeof(float));
+		Bz = calloc(DimTotal, sizeof(float));
+        
+        copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /*initialize */
+        
+        /* begin outer SB iterations */
+        for(ll=0; ll<iter; ll++) {
+            
+            /* storing old estimate */
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+             /* perform two GS iterations (normally 2 is enough for the convergence) */
+            gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
+            copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+            /*GS iteration */
+            gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
+            
+            /* TV-related step */
+            if (methodTV == 1)  updDxDyDz_shrinkAniso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
+            else updDxDyDz_shrinkIso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
+            
+            /* update for Bregman variables */
+            updBxByBz3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* check early stopping criteria if epsilon not equal zero */
+            if (epsil != 0) {
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++) {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+				}
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+			}
+            /*printf("%f %i %i \n", re, ll, count); */
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+		free(Output_prev); free(Dx); free(Dy); free(Dz); free(Bx); free(By); free(Bz);
+	}
+	return *Output;
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda, float mu)
+{
+    float sum, normConst;
+    long i,j,i1,i2,j1,j2,index;
+    normConst = 1.0f/(mu + 4.0f*lambda);
+    
+#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,sum)
+    for(i=0; i<dimX; i++) {
+        /* symmetric boundary conditions (Neuman) */
+        i1 = i+1; if (i1 == dimX) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            index = j*dimX+i;
+            
+            sum = Dx[j*dimX+i2] - Dx[index] + Dy[j2*dimX+i] - Dy[index] - Bx[j*dimX+i2] + Bx[index] - By[j2*dimX+i] + By[index];
+            sum += U_prev[j*dimX+i1] + U_prev[j*dimX+i2] + U_prev[j1*dimX+i] + U_prev[j2*dimX+i];
+            sum *= lambda;
+            sum += mu*A[index];
+            U[index] = normConst*sum;
+        }}
+    return *U;
+}
+
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda)
+{
+    long i,j,i1,j1,index;
+    float val1, val11, val2, val22, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,val22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            index = j*dimX+i;
+            
+            val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*dimX+i] - U[index]) + By[index];
+            
+            val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+            val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+            
+            if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
+            if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
+            
+        }}
+    return 1;
+}
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda)
+{
+    long i,j,i1,j1,index;
+    float val1, val11, val2, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+    
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,denom)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            index = j*dimX+i;
+            
+            val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*dimX+i] - U[index]) + By[index];
+            
+            denom = sqrt(val1*val1 + val2*val2);
+            
+            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[index] = val11*(val1/denom);
+                Dy[index] = val11*(val2/denom);
+            }
+            else {
+                Dx[index] = 0;
+                Dy[index] = 0;
+            }
+        }}
+    return 1;
+}
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY)
+{
+    long i,j,i1,j1,index;
+#pragma omp parallel for shared(U) private(index,i,j,i1,j1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == dimX) i1 = i-1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            index = j*dimX+i;
+            
+            Bx[index] += (U[j*dimX+i1] - U[index]) - Dx[index];
+            By[index] += (U[j1*dimX+i] - U[index]) - Dy[index];
+        }}
+    return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/*****************************************************************/
+float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda, float mu)
+{
+    float normConst, d_val, b_val, sum;
+    long i,j,i1,i2,j1,j2,k,k1,k2,index;
+    normConst = 1.0f/(mu + 6.0f*lambda);
+#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                i2 = i-1; if (i2 < 0) i2 = i+1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                j2 = j-1; if (j2 < 0) j2 = j+1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                k2 = k-1; if (k2 < 0) k2 = k+1;
+                index = (dimX*dimY)*k + j*dimX+i;
+                
+                d_val = Dx[(dimX*dimY)*k + j*dimX+i2] - Dx[index] + Dy[(dimX*dimY)*k + j2*dimX+i] - Dy[index] + Dz[(dimX*dimY)*k2 + j*dimX+i] - Dz[index];
+                b_val = -Bx[(dimX*dimY)*k + j*dimX+i2] + Bx[index] - By[(dimX*dimY)*k + j2*dimX+i] + By[index] - Bz[(dimX*dimY)*k2 + j*dimX+i] + Bz[index];
+                sum = d_val + b_val;
+                sum += U_prev[(dimX*dimY)*k + j*dimX+i1] + U_prev[(dimX*dimY)*k + j*dimX+i2] + U_prev[(dimX*dimY)*k + j1*dimX+i] + U_prev[(dimX*dimY)*k + j2*dimX+i] + U_prev[(dimX*dimY)*k1 + j*dimX+i] + U_prev[(dimX*dimY)*k2 + j*dimX+i];
+                sum *= lambda;
+                sum += mu*A[index];
+                U[index] = normConst*sum;
+            }}}
+    return *U;
+}
+
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda)
+{
+    long i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val22, val3, val33, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
+                
+                val11 = fabs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
+                val22 = fabs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
+                val33 = fabs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
+                
+                if (val1 !=0.0f) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0.0f;
+                if (val2 !=0.0f) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0.0f;
+                if (val3 !=0.0f) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0.0f;
+                
+            }}}
+    return 1;
+}
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda)
+{
+    long i,j,i1,j1,k,k1,index;
+    float val1, val11, val2, val3, denom, denom_lam;
+    denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+                index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
+                val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
+                val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
+                
+                denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+                
+                val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+                
+                if (denom != 0.0f) {
+                    Dx[index] = val11*(val1/denom);
+                    Dy[index] = val11*(val2/denom);
+                    Dz[index] = val11*(val3/denom);
+                }
+                else {
+                    Dx[index] = 0;
+                    Dy[index] = 0;
+                    Dz[index] = 0;
+                }
+            }}}
+    return 1;
+}
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ)
+{
+    long i,j,k,i1,j1,k1,index;
+#pragma omp parallel for shared(U) private(index,i,j,k,i1,j1,k1)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* symmetric boundary conditions (Neuman) */
+                i1 = i+1; if (i1 == dimX) i1 = i-1;
+                j1 = j+1; if (j1 == dimY) j1 = j-1;
+                k1 = k+1; if (k1 == dimZ) k1 = k-1;
+                
+                Bx[index] += (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) - Dx[index];
+                By[index] += (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) - Dy[index];
+                Bz[index] += (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) - Dz[index];
+            }}}
+    return 1;
+}
diff --git a/src/Core/regularisers_CPU/SB_TV_core.h b/src/Core/regularisers_CPU/SB_TV_core.h
new file mode 100644
index 0000000..7485e3b
--- /dev/null
+++ b/src/Core/regularisers_CPU/SB_TV_core.h
@@ -0,0 +1,61 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda, float mu);
+CCPI_EXPORT float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda);
+CCPI_EXPORT float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda);
+CCPI_EXPORT float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY);
+
+CCPI_EXPORT float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda, float mu);
+CCPI_EXPORT float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda);
+CCPI_EXPORT float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda);
+CCPI_EXPORT float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/TGV_core.c b/src/Core/regularisers_CPU/TGV_core.c
new file mode 100644
index 0000000..805c3d4
--- /dev/null
+++ b/src/Core/regularisers_CPU/TGV_core.c
@@ -0,0 +1,487 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TGV_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ * 
+ * Output:
+ * Filtered/regularised image/volume
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ * 
+ */
+ 
+float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ)
+{
+	long DimTotal;
+	int ll;
+	float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+
+	DimTotal = (long)(dimX*dimY*dimZ);
+	copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
+        tau = pow(L2,-0.5);
+        sigma = pow(L2,-0.5);
+
+        /* dual variables */
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        
+        Q1 = calloc(DimTotal, sizeof(float));
+        Q2 = calloc(DimTotal, sizeof(float));
+        Q3 = calloc(DimTotal, sizeof(float));
+        
+        U_old = calloc(DimTotal, sizeof(float));
+        
+        V1 = calloc(DimTotal, sizeof(float));
+        V1_old = calloc(DimTotal, sizeof(float));
+        V2 = calloc(DimTotal, sizeof(float));
+        V2_old = calloc(DimTotal, sizeof(float));
+	
+	if (dimZ == 1) {
+	/*2D case*/
+	
+        /* Primal-dual iterations begin here */
+        for(ll = 0; ll < iter; ll++) {
+            
+            /* Calculate Dual Variable P */
+            DualP_2D(U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), sigma);
+            
+            /*Projection onto convex set for P*/
+            ProjP_2D(P1, P2, (long)(dimX), (long)(dimY), alpha1);
+            
+            /* Calculate Dual Variable Q */
+            DualQ_2D(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), sigma);
+            
+            /*Projection onto convex set for Q*/
+            ProjQ_2D(Q1, Q2, Q3, (long)(dimX), (long)(dimY), alpha0);
+            
+            /*saving U into U_old*/
+            copyIm(U, U_old, (long)(dimX), (long)(dimY), 1l);
+            
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D(U, U0, P1, P2, (long)(dimX), (long)(dimY), lambda, tau);
+            
+            /*get updated solution U*/
+            newU(U, U_old, (long)(dimX), (long)(dimY));
+            
+            /*saving V into V_old*/
+            copyIm(V1, V1_old, (long)(dimX), (long)(dimY), 1l);
+            copyIm(V2, V2_old, (long)(dimX), (long)(dimY), 1l);
+            
+            /* upd V*/
+            UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), tau);
+            
+            /*get new V*/
+            newU(V1, V1_old, (long)(dimX), (long)(dimY));
+            newU(V2, V2_old, (long)(dimX), (long)(dimY));
+        } /*end of iterations*/
+        	}
+        else {
+        /*3D case*/
+        float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
+        
+        P3 = calloc(DimTotal, sizeof(float));
+        Q4 = calloc(DimTotal, sizeof(float));
+        Q5 = calloc(DimTotal, sizeof(float));
+        Q6 = calloc(DimTotal, sizeof(float));
+        V3 = calloc(DimTotal, sizeof(float));
+        V3_old = calloc(DimTotal, sizeof(float));
+        
+         /* Primal-dual iterations begin here */
+        for(ll = 0; ll < iter; ll++) {
+            
+            /* Calculate Dual Variable P */
+            DualP_3D(U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
+            
+            /*Projection onto convex set for P*/
+            ProjP_3D(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1);
+            
+            /* Calculate Dual Variable Q */
+            DualQ_3D(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
+            
+            /*Projection onto convex set for Q*/
+            ProjQ_3D(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0);
+            
+            /*saving U into U_old*/
+            copyIm(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_3D(U, U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau);
+            
+            /*get updated solution U*/
+            newU3D(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /*saving V into V_old*/
+            copyIm_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+            
+            /* upd V*/
+            UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
+            
+            /*get new V*/
+            newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));           
+	        } /*end of iterations*/
+        free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old);
+        }     
+
+    /*freeing*/
+    free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old);
+    free(V1);free(V2);free(V1_old);free(V2_old);
+	return *U;
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+
+/*Calculating dual variable P (using forward differences)*/
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma)
+{
+    long i,j, index;
+#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			 index = j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); 
+            else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]); 
+            if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index])  - V2[index]);
+            else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
+        }}
+    return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1)
+{
+    float grad_magn;
+    long i,j,index;
+#pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+	    index = j*dimX+i;
+            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1;
+            if (grad_magn > 1.0f) {
+                P1[index] /= grad_magn;
+                P2[index] /= grad_magn;
+            }
+        }}
+    return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma)
+{
+    long i,j,index;
+    float q1, q2, q11, q22;
+#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+    	    index = j*dimX+i;
+    	    q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
+            /* boundary conditions (Neuman) */
+            if (i != dimX-1){
+                q1 = V1[j*dimX+(i+1)] - V1[index];
+                q11 = V2[j*dimX+(i+1)] - V2[index];
+            }
+            if (j != dimY-1) {
+                q2 = V2[(j+1)*dimX+i] - V2[index];
+                q22 = V1[(j+1)*dimX+i] - V1[index];
+            }
+            Q1[index] += sigma*(q1);
+            Q2[index] += sigma*(q2);
+            Q3[index] += sigma*(0.5f*(q11 + q22));
+        }}
+    return 1;
+}
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0)
+{
+    float grad_magn;
+    long i,j,index;
+#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+	   index = j*dimX+i;
+            grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0f) {
+                Q1[index] /= grad_magn;
+                Q2[index] /= grad_magn;
+                Q3[index] /= grad_magn;
+            }
+        }}
+    return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau)
+{
+    long i,j,index;
+    float P_v1, P_v2, div;
+#pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+	    index = j*dimX+i;
+            if (i == 0) P_v1 = P1[index];
+            else P_v1 = P1[index] - P1[j*dimX+(i-1)];
+            if (j == 0) P_v2 = P2[index];
+            else  P_v2 = P2[index] - P2[(j-1)*dimX+i];
+            div = P_v1 + P_v2;
+            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+        }}
+    return *U;
+}
+/*get updated solution U*/
+float newU(float *U, float *U_old, long dimX, long dimY)
+{
+    long i;
+#pragma omp parallel for shared(U,U_old) private(i)
+    for(i=0; i<dimX*dimY; i++) U[i] = 2*U[i] - U_old[i];
+    return *U;
+}
+/*get update for V*/
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau)
+{
+    long i, j, index;
+    float q1, q3_x, q3_y, q2, div1, div2;
+#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+	    index = j*dimX+i;
+              q2 = 0.0f;  q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
+            /* boundary conditions (Neuman) */
+            if (i != 0) {
+                q1 = Q1[index] - Q1[j*dimX+(i-1)];
+                q3_x = Q3[index] - Q3[j*dimX+(i-1)];
+            }
+            if (j != 0) {
+                q2 = Q2[index] - Q2[(j-1)*dimX+i];
+                q3_y = Q3[index] - Q3[(j-1)*dimX+i];
+            }
+            div1 = q1 + q3_y;
+            div2 = q3_x + q2;
+            V1[index] += tau*(P1[index] + div1);
+            V2[index] += tau*(P2[index] + div2);
+        }}
+    return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/*Calculating dual variable P (using forward differences)*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma)
+{
+    long i,j,k, index;
+#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+          for(k=0; k<dimZ; k++) {             	   
+    	   index = (dimX*dimY)*k + j*dimX+i;    	   
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); 
+            else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index])  - V1[index]); 
+            if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index])  - V2[index]);
+            else  P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index])  - V2[index]);
+            if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index])  - V3[index]);
+            else  P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index])  - V3[index]);
+        }}}
+    return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1)
+{
+    float grad_magn;
+    long i,j,k,index;
+#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+	  for(k=0; k<dimZ; k++) {   	
+   	    index = (dimX*dimY)*k + j*dimX+i;
+            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+            if (grad_magn > 1.0f) {
+                P1[index] /= grad_magn;
+                P2[index] /= grad_magn;
+                P3[index] /= grad_magn;
+            }
+        }}}
+    return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma)
+{
+    long i,j,k,index;
+    float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+       	  for(k=0; k<dimZ; k++) {   	
+	    index = (dimX*dimY)*k + j*dimX+i;
+	    q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+            /* symmetric boundary conditions (Neuman) */
+            if (i != dimX-1){ 
+                q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];              
+                q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
+                q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
+            }
+            if (j != dimY-1) {
+                q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];                
+                q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
+                q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
+            }
+            if (k != dimZ-1) {
+                q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
+                q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
+                q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
+            }
+            
+            Q1[index] += sigma*(q1); /*Q11*/
+            Q2[index] += sigma*(q2); /*Q22*/            
+            Q3[index] += sigma*(q3); /*Q33*/
+            Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
+            Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
+            Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
+        }}}
+    return 1;
+}
+float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0)
+{
+    float grad_magn;
+    long i,j,k,index;
+#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+       	  for(k=0; k<dimZ; k++) {   	
+	    index = (dimX*dimY)*k + j*dimX+i;           
+            grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0f) {
+                Q1[index] /= grad_magn;
+                Q2[index] /= grad_magn;
+                Q3[index] /= grad_magn;
+                Q4[index] /= grad_magn;
+                Q5[index] /= grad_magn;
+                Q6[index] /= grad_magn;
+            }
+        }}}
+    return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau)
+{
+    long i,j,k,index;
+    float P_v1, P_v2, P_v3, div;
+#pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+       	  for(k=0; k<dimZ; k++) {   	
+	    index = (dimX*dimY)*k + j*dimX+i; 	    
+            if (i == 0) P_v1 = P1[index];
+            else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
+            if (j == 0) P_v2 = P2[index];
+            else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
+            if (k == 0) P_v3 = P3[index];
+            else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];              
+                      
+            div = P_v1 + P_v2 + P_v3;
+            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); 
+        }}}
+    return *U;
+}
+/*get update for V*/
+float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau)
+{
+    long i,j,k,index;
+    float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
+#pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+       	  for(k=0; k<dimZ; k++) {   	
+	    index = (dimX*dimY)*k + j*dimX+i; 	
+	    q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
+            /* Q1 - Q11, Q2 - Q22, Q3 -  Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/            
+            /* symmetric boundary conditions (Neuman) */
+            if (i != 0) {
+                q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+                q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];                
+                q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
+            }
+            if (j != 0) {
+                q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+                q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+                q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
+            }
+             if (k != 0) {
+                q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+                q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+                q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+            }
+            div1 = q1 + q4y + q5z;
+            div2 = q4x + q2 + q6z;            
+            div3 = q5x + q6y + q3;
+            
+            V1[index] += tau*(P1[index] + div1);
+            V2[index] += tau*(P2[index] + div2);
+            V3[index] += tau*(P3[index] + div3);
+        }}}
+    return 1;
+}
+
+float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
+{
+	long j;
+#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j)
+	for (j = 0; j<dimX*dimY*dimZ; j++)  {	
+	V1_old[j] = V1[j];
+	V2_old[j] = V2[j];
+	V3_old[j] = V3[j];	
+	}
+	return 1;
+}
+
+/*get updated solution U*/
+float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ)
+{
+    long i;
+#pragma omp parallel for shared(U, U_old) private(i)
+    for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2.0f*U[i] - U_old[i];
+    return *U;
+}
+
+
+/*get updated solution U*/
+float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
+{
+    long i;
+#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i)
+    for(i=0; i<dimX*dimY*dimZ; i++) {
+    V1[i] = 2.0f*V1[i] - V1_old[i];
+    V2[i] = 2.0f*V2[i] - V2_old[i];
+    V3[i] = 2.0f*V3[i] - V3_old[i];
+    }
+    return 1;
+}
+
diff --git a/src/Core/regularisers_CPU/TGV_core.h b/src/Core/regularisers_CPU/TGV_core.h
new file mode 100644
index 0000000..11b12c1
--- /dev/null
+++ b/src/Core/regularisers_CPU/TGV_core.h
@@ -0,0 +1,73 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ * 
+ * Output:
+ * Filtered/regularised image/volume
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+ 
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CCPI_EXPORT float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ);
+
+/* 2D functions */
+CCPI_EXPORT float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma);
+CCPI_EXPORT float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1);
+CCPI_EXPORT float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma);
+CCPI_EXPORT float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0);
+CCPI_EXPORT float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau);
+CCPI_EXPORT float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau);
+CCPI_EXPORT float newU(float *U, float *U_old, long dimX, long dimY);
+/* 3D functions */
+CCPI_EXPORT float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma);
+CCPI_EXPORT float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1);
+CCPI_EXPORT float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma);
+CCPI_EXPORT float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0);
+CCPI_EXPORT float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau);
+CCPI_EXPORT float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau);
+CCPI_EXPORT float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/TNV_core.c b/src/Core/regularisers_CPU/TNV_core.c
new file mode 100755
index 0000000..753cc5f
--- /dev/null
+++ b/src/Core/regularisers_CPU/TNV_core.c
@@ -0,0 +1,452 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TNV_core.h"
+
+/*
+ * C-OMP implementation of Total Nuclear Variation regularisation model (2D + channels) [1]
+ * The code is modified from the implementation by Joan Duran <joan.duran@uib.es> see
+ * "denoisingPDHG_ipol.cpp" in Joans Collaborative Total Variation package
+ *
+ * Input Parameters:
+ * 1. Noisy volume of 2D + channel dimension, i.e. 3D volume
+ * 2. lambda - regularisation parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+ *
+ * Output:
+ * 1. Filtered/regularized image
+ *
+ * [1]. Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.
+ */
+
+float TNV_CPU_main(float *Input, float *u, float lambda, int maxIter, float tol, int dimX, int dimY, int dimZ)
+{
+    long k, p, q, r, DimTotal;
+    float taulambda;
+    float *u_upd, *gx, *gy, *gx_upd, *gy_upd, *qx, *qy, *qx_upd, *qy_upd, *v, *vx, *vy, *gradx, *grady, *gradx_upd, *grady_upd, *gradx_ubar, *grady_ubar, *div, *div_upd;
+    
+    p = 1l;
+    q = 1l;
+    r = 0l;
+    
+    lambda = 1.0f/(2.0f*lambda);
+    DimTotal = (long)(dimX*dimY*dimZ);
+    /* PDHG algorithm parameters*/
+    float tau = 0.5f;
+    float sigma = 0.5f;
+    float theta = 1.0f;
+    
+    // Auxiliar vectors
+    u_upd = calloc(DimTotal, sizeof(float));
+    gx = calloc(DimTotal, sizeof(float));
+    gy = calloc(DimTotal, sizeof(float));
+    gx_upd = calloc(DimTotal, sizeof(float));
+    gy_upd = calloc(DimTotal, sizeof(float));
+    qx = calloc(DimTotal, sizeof(float));
+    qy = calloc(DimTotal, sizeof(float));
+    qx_upd = calloc(DimTotal, sizeof(float));
+    qy_upd = calloc(DimTotal, sizeof(float));
+    v = calloc(DimTotal, sizeof(float));
+    vx = calloc(DimTotal, sizeof(float));
+    vy = calloc(DimTotal, sizeof(float));
+    gradx = calloc(DimTotal, sizeof(float));
+    grady = calloc(DimTotal, sizeof(float));
+    gradx_upd = calloc(DimTotal, sizeof(float));
+    grady_upd = calloc(DimTotal, sizeof(float));
+    gradx_ubar = calloc(DimTotal, sizeof(float));
+    grady_ubar = calloc(DimTotal, sizeof(float));
+    div = calloc(DimTotal, sizeof(float));
+    div_upd = calloc(DimTotal, sizeof(float));
+    
+    // Backtracking parameters
+    float s = 1.0f;
+    float gamma = 0.75f;
+    float beta = 0.95f;
+    float alpha0 = 0.2f;
+    float alpha = alpha0;
+    float delta = 1.5f;
+    float eta = 0.95f;
+    
+    // PDHG algorithm parameters
+    taulambda = tau * lambda;
+    float divtau = 1.0f / tau;
+    float divsigma = 1.0f / sigma;
+    float theta1 = 1.0f + theta;
+    
+    /*allocate memory for  taulambda */
+    //taulambda = (float*) calloc(dimZ, sizeof(float));
+    //for(k=0; k < dimZ; k++)  {taulambda[k] = tau*lambda[k];}
+    
+    // Apply Primal-Dual Hybrid Gradient scheme
+    int iter = 0;
+    float residual = fLarge;
+    float ubarx, ubary;
+    
+    for(iter = 0; iter < maxIter; iter++)   {
+        // Argument of proximal mapping of fidelity term
+#pragma omp parallel for shared(v, u) private(k)
+        for(k=0; k<dimX*dimY*dimZ; k++)  {v[k] = u[k] + tau*div[k];}
+
+// Proximal solution of fidelity term
+proxG(u_upd, v, Input, taulambda, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Gradient of updated primal variable
+gradient(u_upd, gradx_upd, grady_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Argument of proximal mapping of regularization term
+#pragma omp parallel for shared(gradx_upd, grady_upd, gradx, grady) private(k, ubarx, ubary)
+for(k=0; k<dimX*dimY*dimZ; k++) {
+    ubarx = theta1 * gradx_upd[k] - theta * gradx[k];
+    ubary = theta1 * grady_upd[k] - theta * grady[k];
+    vx[k] = ubarx + divsigma * qx[k];
+    vy[k] = ubary + divsigma * qy[k];
+    gradx_ubar[k] = ubarx;
+    grady_ubar[k] = ubary;
+}
+
+proxF(gx_upd, gy_upd, vx, vy, sigma, p, q, r, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Update dual variable
+#pragma omp parallel for shared(qx_upd, qy_upd) private(k)
+for(k=0; k<dimX*dimY*dimZ; k++) {
+    qx_upd[k] = qx[k] + sigma * (gradx_ubar[k] - gx_upd[k]);
+    qy_upd[k] = qy[k] + sigma * (grady_ubar[k] - gy_upd[k]);
+}
+
+// Divergence of updated dual variable
+#pragma omp parallel for shared(div_upd) private(k)
+for(k=0; k<dimX*dimY*dimZ; k++)  {div_upd[k] = 0.0f;}
+divergence(qx_upd, qy_upd, div_upd, dimX, dimY, dimZ);
+
+// Compute primal residual, dual residual, and backtracking condition
+float resprimal = 0.0f;
+float resdual = 0.0f;
+float product = 0.0f;
+float unorm = 0.0f;
+float qnorm = 0.0f;
+
+for(k=0; k<dimX*dimY*dimZ; k++) {
+    float udiff = u[k] - u_upd[k];
+    float qxdiff = qx[k] - qx_upd[k];
+    float qydiff = qy[k] - qy_upd[k];
+    float divdiff = div[k] - div_upd[k];
+    float gradxdiff = gradx[k] - gradx_upd[k];
+    float gradydiff = grady[k] - grady_upd[k];
+    
+    resprimal += fabs(divtau*udiff + divdiff);
+    resdual += fabs(divsigma*qxdiff - gradxdiff);
+    resdual += fabs(divsigma*qydiff - gradydiff);
+    
+    unorm += (udiff * udiff);
+    qnorm += (qxdiff * qxdiff + qydiff * qydiff);
+    product += (gradxdiff * qxdiff + gradydiff * qydiff);
+}
+
+float b = (2.0f * tau * sigma * product) / (gamma * sigma * unorm +
+        gamma * tau * qnorm);
+
+// Adapt step-size parameters
+float dual_dot_delta = resdual * s * delta;
+float dual_div_delta = (resdual * s) / delta;
+
+if(b > 1)
+{
+    // Decrease step-sizes to fit balancing principle
+    tau = (beta * tau) / b;
+    sigma = (beta * sigma) / b;
+    alpha = alpha0;
+    
+    copyIm(u, u_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(gx, gx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(gy, gy_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(qx, qx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(qy, qy_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(gradx, gradx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(grady, grady_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    copyIm(div, div_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+    
+} else if(resprimal > dual_dot_delta)
+{
+    // Increase primal step-size and decrease dual step-size
+    tau = tau / (1.0f - alpha);
+    sigma = sigma * (1.0f - alpha);
+    alpha = alpha * eta;
+    
+} else if(resprimal < dual_div_delta)
+{
+    // Decrease primal step-size and increase dual step-size
+    tau = tau * (1.0f - alpha);
+    sigma = sigma / (1.0f - alpha);
+    alpha = alpha * eta;
+}
+
+// Update variables
+taulambda = tau * lambda;
+//for(k=0; k < dimZ; k++) taulambda[k] = tau*lambda[k];
+
+divsigma = 1.0f / sigma;
+divtau = 1.0f / tau;
+
+copyIm(u_upd, u, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(gx_upd, gx, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(gy_upd, gy, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(qx_upd, qx, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(qy_upd, qy, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(gradx_upd, gradx, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(grady_upd, grady, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(div_upd, div, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Compute residual at current iteration
+residual = (resprimal + resdual) / ((float) (dimX*dimY*dimZ));
+
+//       printf("%f \n", residual);
+if (residual < tol) {
+    printf("Iterations stopped at %i with the residual %f \n", iter, residual);
+    break; }
+
+    }
+    printf("Iterations stopped at %i with the residual %f \n", iter, residual);
+    free (u_upd); free(gx); free(gy); free(gx_upd); free(gy_upd);
+    free(qx); free(qy); free(qx_upd); free(qy_upd); free(v); free(vx); free(vy);
+    free(gradx); free(grady); free(gradx_upd); free(grady_upd); free(gradx_ubar);
+    free(grady_ubar); free(div); free(div_upd);    
+    return *u;
+}
+
+float proxG(float *u_upd, float *v, float *f, float taulambda, long dimX, long dimY, long dimZ)
+{
+    float constant;
+    long k;
+    constant = 1.0f + taulambda;
+#pragma omp parallel for shared(v, f, u_upd) private(k)
+    for(k=0; k<dimZ*dimX*dimY; k++) {
+        u_upd[k] = (v[k] + taulambda * f[k])/constant;
+        //u_upd[(dimX*dimY)*k + l] = (v[(dimX*dimY)*k + l] + taulambda * f[(dimX*dimY)*k + l])/constant;
+    }
+    return *u_upd;
+}
+
+float gradient(float *u_upd, float *gradx_upd, float *grady_upd, long dimX, long dimY, long dimZ)
+{
+    long i, j, k, l;
+    // Compute discrete gradient using forward differences
+#pragma omp parallel for shared(gradx_upd,grady_upd,u_upd) private(i, j, k, l)
+    for(k = 0; k < dimZ; k++)   {
+        for(j = 0; j < dimY; j++)   {
+            l = j * dimX;           
+            for(i = 0; i < dimX; i++)   {
+                // Derivatives in the x-direction
+                if(i != dimX-1)
+                    gradx_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+1+l] - u_upd[(dimX*dimY)*k + i+l];
+                else
+                    gradx_upd[(dimX*dimY)*k + i+l] = 0.0f;
+                
+                // Derivatives in the y-direction
+                if(j != dimY-1)
+                    //grady_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+dimY+l] -u_upd[(dimX*dimY)*k + i+l];
+                    grady_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+(j+1)*dimX] -u_upd[(dimX*dimY)*k + i+l];
+                else
+                    grady_upd[(dimX*dimY)*k + i+l] = 0.0f;
+            }}}
+    return 1;
+}
+
+float proxF(float *gx, float *gy, float *vx, float *vy, float sigma, int p, int q, int r, long dimX, long dimY, long dimZ)
+{
+    // (S^p, \ell^1) norm decouples at each pixel
+//   Spl1(gx, gy, vx, vy, sigma, p, num_channels, dim);
+    float divsigma = 1.0f / sigma;
+    
+    // $\ell^{1,1,1}$-TV regularization
+//       int i,j,k;
+//     #pragma omp parallel for shared (gx,gy,vx,vy) private(i,j,k)
+//      for(k = 0; k < dimZ; k++)  {
+//         for(i=0; i<dimX; i++) {
+//              for(j=0; j<dimY; j++) {
+//                 gx[(dimX*dimY)*k + (i)*dimY + (j)] = SIGN(vx[(dimX*dimY)*k + (i)*dimY + (j)]) * MAX(fabs(vx[(dimX*dimY)*k + (i)*dimY + (j)]) - divsigma,  0.0f);
+//                 gy[(dimX*dimY)*k + (i)*dimY + (j)] = SIGN(vy[(dimX*dimY)*k + (i)*dimY + (j)]) * MAX(fabs(vy[(dimX*dimY)*k + (i)*dimY + (j)]) - divsigma,  0.0f);
+//             }}}
+    
+    // Auxiliar vector
+    float *proj, sum, shrinkfactor ;
+    float M1,M2,M3,valuex,valuey,T,D,det,eig1,eig2,sig1,sig2,V1, V2, V3, V4, v0,v1,v2, mu1,mu2,sig1_upd,sig2_upd,t1,t2,t3;
+    long i,j,k, ii, num;
+#pragma omp parallel for shared (gx,gy,vx,vy,p) private(i,ii,j,k,proj,num, sum, shrinkfactor, M1,M2,M3,valuex,valuey,T,D,det,eig1,eig2,sig1,sig2,V1, V2, V3, V4,v0,v1,v2,mu1,mu2,sig1_upd,sig2_upd,t1,t2,t3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            
+            proj = (float*) calloc (2,sizeof(float));
+            // Compute matrix $M\in\R^{2\times 2}$
+            M1 = 0.0f;
+            M2 = 0.0f;
+            M3 = 0.0f;
+            
+            for(k = 0; k < dimZ; k++)
+            {
+                valuex = vx[(dimX*dimY)*k + (j)*dimX + (i)];
+                valuey = vy[(dimX*dimY)*k + (j)*dimX + (i)];
+                
+                M1 += (valuex * valuex);
+                M2 += (valuex * valuey);
+                M3 += (valuey * valuey);
+            }
+            
+            // Compute eigenvalues of M
+            T = M1 + M3;
+            D = M1 * M3 - M2 * M2;
+            det = sqrt(MAX((T * T / 4.0f) - D, 0.0f));
+            eig1 = MAX((T / 2.0f) + det, 0.0f);
+            eig2 = MAX((T / 2.0f) - det, 0.0f);
+            sig1 = sqrt(eig1);
+            sig2 = sqrt(eig2);
+            
+            // Compute normalized eigenvectors
+            V1 = V2 = V3 = V4 = 0.0f;
+            
+            if(M2 != 0.0f)
+            {
+                v0 = M2;
+                v1 = eig1 - M3;
+                v2 = eig2 - M3;
+                
+                mu1 = sqrtf(v0 * v0 + v1 * v1);
+                mu2 = sqrtf(v0 * v0 + v2 * v2);
+                
+                if(mu1 > fTiny)
+                {
+                    V1 = v1 / mu1;
+                    V3 = v0 / mu1;
+                }
+                
+                if(mu2 > fTiny)
+                {
+                    V2 = v2 / mu2;
+                    V4 = v0 / mu2;
+                }
+                
+            } else
+            {
+                if(M1 > M3)
+                {
+                    V1 = V4 = 1.0f;
+                    V2 = V3 = 0.0f;
+                    
+                } else
+                {
+                    V1 = V4 = 0.0f;
+                    V2 = V3 = 1.0f;
+                }
+            }
+            
+            // Compute prox_p of the diagonal entries
+            sig1_upd = sig2_upd = 0.0f;
+            
+            if(p == 1)
+            {
+                sig1_upd = MAX(sig1 - divsigma, 0.0f);
+                sig2_upd = MAX(sig2 - divsigma, 0.0f);
+                
+            } else if(p == INFNORM)
+            {
+                proj[0] = sigma * fabs(sig1);
+                proj[1] = sigma * fabs(sig2);
+                
+                /*l1 projection part */
+                sum = fLarge;
+                num = 0l;
+                shrinkfactor = 0.0f;
+                while(sum > 1.0f)
+                {
+                    sum = 0.0f;
+                    num = 0;
+                    
+                    for(ii = 0; ii < 2; ii++)
+                    {
+                        proj[ii] = MAX(proj[ii] - shrinkfactor, 0.0f);
+                        
+                        sum += fabs(proj[ii]);
+                        if(proj[ii]!= 0.0f)
+                            num++;
+                    }
+                    
+                    if(num > 0)
+                        shrinkfactor = (sum - 1.0f) / num;
+                    else
+                        break;
+                }
+                /*l1 proj ends*/
+                
+                sig1_upd = sig1 - divsigma * proj[0];
+                sig2_upd = sig2 - divsigma * proj[1];
+            }
+            
+            // Compute the diagonal entries of $\widehat{\Sigma}\Sigma^{\dagger}_0$
+            if(sig1 > fTiny)
+                sig1_upd /= sig1;
+            
+            if(sig2 > fTiny)
+                sig2_upd /= sig2;
+            
+            // Compute solution
+            t1 = sig1_upd * V1 * V1 + sig2_upd * V2 * V2;
+            t2 = sig1_upd * V1 * V3 + sig2_upd * V2 * V4;
+            t3 = sig1_upd * V3 * V3 + sig2_upd * V4 * V4;
+            
+            for(k = 0; k < dimZ; k++)
+            {
+                gx[(dimX*dimY)*k + j*dimX + i] = vx[(dimX*dimY)*k + j*dimX + i] * t1 + vy[(dimX*dimY)*k + j*dimX + i] * t2;
+                gy[(dimX*dimY)*k + j*dimX + i] = vx[(dimX*dimY)*k + j*dimX + i] * t2 + vy[(dimX*dimY)*k + j*dimX + i] * t3;
+            }           
+            
+            // Delete allocated memory
+            free(proj);
+        }}
+    
+    return 1;
+}
+
+float divergence(float *qx_upd, float *qy_upd, float *div_upd, long dimX, long dimY, long dimZ)
+{
+    long i, j, k, l;
+#pragma omp parallel for shared(qx_upd,qy_upd,div_upd) private(i, j, k, l)
+    for(k = 0; k < dimZ; k++)   {
+        for(j = 0; j < dimY; j++)   {
+            l = j * dimX;            
+            for(i = 0; i < dimX; i++)   {
+                if(i != dimX-1)
+                {
+                    // ux[k][i+l] = u[k][i+1+l] - u[k][i+l]
+                    div_upd[(dimX*dimY)*k + i+1+l] -= qx_upd[(dimX*dimY)*k + i+l];
+                    div_upd[(dimX*dimY)*k + i+l] += qx_upd[(dimX*dimY)*k + i+l];
+                }
+                
+                if(j != dimY-1)
+                {
+                    // uy[k][i+l] = u[k][i+width+l] - u[k][i+l]
+                    //div_upd[(dimX*dimY)*k + i+dimY+l] -= qy_upd[(dimX*dimY)*k + i+l];
+                    div_upd[(dimX*dimY)*k + i+(j+1)*dimX] -= qy_upd[(dimX*dimY)*k + i+l];                    
+                    div_upd[(dimX*dimY)*k + i+l] += qy_upd[(dimX*dimY)*k + i+l];
+                }
+            }
+        }
+    }
+    return *div_upd;
+}
diff --git a/src/Core/regularisers_CPU/TNV_core.h b/src/Core/regularisers_CPU/TNV_core.h
new file mode 100644
index 0000000..aa050a4
--- /dev/null
+++ b/src/Core/regularisers_CPU/TNV_core.h
@@ -0,0 +1,47 @@
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+#define fTiny 0.00000001f
+#define fLarge 100000000.0f
+#define INFNORM -1
+
+#define MAX(i,j) ((i)<(j) ? (j):(i))
+#define MIN(i,j) ((i)<(j) ? (i):(j))
+
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float TNV_CPU_main(float *Input, float *u, float lambda, int maxIter, float tol, int dimX, int dimY, int dimZ);
+
+/*float PDHG(float *A, float *B, float tau, float sigma, float theta, float lambda, int p, int q, int r, float tol, int maxIter, int d_c, int d_w, int d_h);*/
+CCPI_EXPORT float proxG(float *u_upd, float *v, float *f, float taulambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float gradient(float *u_upd, float *gradx_upd, float *grady_upd, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float proxF(float *gx, float *gy, float *vx, float *vy, float sigma, int p, int q, int r, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float divergence(float *qx_upd, float *qy_upd, float *div_upd, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/src/Core/regularisers_CPU/utils.c b/src/Core/regularisers_CPU/utils.c
new file mode 100644
index 0000000..7a4e80b
--- /dev/null
+++ b/src/Core/regularisers_CPU/utils.c
@@ -0,0 +1,117 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "utils.h"
+#include <math.h>
+
+/* Copy Image (float) */
+float copyIm(float *A, float *U, long dimX, long dimY, long dimZ)
+{
+	long j;
+#pragma omp parallel for shared(A, U) private(j)
+	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+	return *U;
+}
+
+/* Copy Image */
+unsigned char copyIm_unchar(unsigned char *A, unsigned char *U, int dimX, int dimY, int dimZ)
+{
+	int j;
+#pragma omp parallel for shared(A, U) private(j)
+	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+	return *U;
+}
+
+/*Roll image symmetrically from top to bottom*/
+float copyIm_roll(float *A, float *U, int dimX, int dimY, int roll_value, int switcher)
+{
+    int i, j;
+#pragma omp parallel for shared(U, A) private(i,j)
+    for (i=0; i<dimX; i++) {
+        for (j=0; j<dimY; j++) {
+            if (switcher == 0) {
+                if (j < (dimY - roll_value)) U[j*dimX + i] = A[(j+roll_value)*dimX + i];
+                else U[j*dimX + i] = A[(j - (dimY - roll_value))*dimX + i];
+            }
+            else {
+                if (j < roll_value) U[j*dimX + i] = A[(j+(dimY - roll_value))*dimX + i];
+                else U[j*dimX + i] = A[(j - roll_value)*dimX + i];
+            }
+        }}
+    return *U;
+}
+
+/* function that calculates TV energy
+ * type - 1:  2*lambda*min||\nabla u|| + ||u -u0||^2
+ * type - 2:  2*lambda*min||\nabla u|| 
+ * */
+float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY)
+{
+	int i, j, i1, j1, index;
+	float NOMx_2, NOMy_2, E_Grad=0.0f, E_Data=0.0f;
+	
+	/* first calculate \grad U_xy*/	
+        for(j=0; j<dimY; j++) {
+            for(i=0; i<dimX; i++) {
+				index = j*dimX+i;
+                /* boundary conditions */
+                i1 = i + 1; if (i == dimX-1) i1 = i;
+                j1 = j + 1; if (j == dimY-1) j1 = j;
+                
+                /* Forward differences */                
+                NOMx_2 = powf((float)(U[j1*dimX + i] - U[index]),2); /* x+ */
+                NOMy_2 = powf((float)(U[j*dimX + i1] - U[index]),2); /* y+ */
+                E_Grad += 2.0f*lambda*sqrtf((float)(NOMx_2) + (float)(NOMy_2)); /* gradient term energy */
+                E_Data += powf((float)(U[index]-U0[index]),2); /* fidelity term energy */
+			}
+		}
+		if (type == 1) E_val[0] = E_Grad + E_Data;
+		if (type == 2) E_val[0] = E_Grad;
+		return *E_val;
+}
+
+float TV_energy3D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY, int dimZ)
+{
+	long i, j, k, i1, j1, k1, index;
+	float NOMx_2, NOMy_2, NOMz_2, E_Grad=0.0f, E_Data=0.0f;
+	
+	/* first calculate \grad U_xy*/	
+    for(j=0; j<(long)(dimY); j++) {
+        for(i=0; i<(long)(dimX); i++) {
+            for(k=0; k<(long)(dimZ); k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* boundary conditions */
+                i1 = i + 1; if (i == (long)(dimX-1)) i1 = i;
+                j1 = j + 1; if (j == (long)(dimY-1)) j1 = j;
+                k1 = k + 1; if (k == (long)(dimZ-1)) k1 = k;
+                
+                /* Forward differences */                
+                NOMx_2 = powf((float)(U[(dimX*dimY)*k + j1*dimX+i] - U[index]),2); /* x+ */
+                NOMy_2 = powf((float)(U[(dimX*dimY)*k + j*dimX+i1] - U[index]),2); /* y+ */
+                NOMz_2 = powf((float)(U[(dimX*dimY)*k1 + j*dimX+i] - U[index]),2); /* z+ */
+                
+                E_Grad += 2.0f*lambda*sqrtf((float)(NOMx_2) + (float)(NOMy_2) + (float)(NOMz_2)); /* gradient term energy */
+                E_Data += (powf((float)(U[index]-U0[index]),2)); /* fidelity term energy */
+			}
+		}
+	}
+		if (type == 1) E_val[0] = E_Grad + E_Data;
+		if (type == 2) E_val[0] = E_Grad;
+		return *E_val;
+}
diff --git a/src/Core/regularisers_CPU/utils.h b/src/Core/regularisers_CPU/utils.h
new file mode 100644
index 0000000..cfaf6d7
--- /dev/null
+++ b/src/Core/regularisers_CPU/utils.h
@@ -0,0 +1,34 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <stdlib.h>
+#include <memory.h>
+#include "CCPiDefines.h"
+#include "omp.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float copyIm(float *A, float *U, long dimX, long dimY, long dimZ);
+CCPI_EXPORT unsigned char copyIm_unchar(unsigned char *A, unsigned char *U, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float copyIm_roll(float *A, float *U, int dimX, int dimY, int roll_value, int switcher);
+CCPI_EXPORT float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY);
+CCPI_EXPORT float TV_energy3D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
new file mode 100644
index 0000000..a4dbe70
--- /dev/null
+++ b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
@@ -0,0 +1,268 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "Diffus_4thO_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma)
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for explicit scheme
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+    
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-7
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void Weighted_Laplc2D_kernel(float *W_Lapl, float *U0, float sigma, int dimX, int dimY)
+{
+		int i1,i2,j1,j2;
+		float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
+    
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+            /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+
+				gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
+				gradX_sq = powf(gradX,2);
+				
+				gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
+                gradY_sq = powf(gradY,2);
+                
+                gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
+                gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
+                
+                gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
+                xy_2 = 2.0f*gradX*gradY*gradXY;
+                
+                denom =  gradX_sq + gradY_sq;
+                
+                if (denom <= EPS) {
+                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
+                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS; 
+                    }
+                else  {
+                    V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
+                    V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;  
+                    }
+
+                c = 1.0f/(1.0f + denom/sigma);
+                c_sq = c*c;
+                
+                W_Lapl[index] = c_sq*V_norm + c*V_orth;
+		}
+	return;
+} 
+
+__global__ void Diffusion_update_step2D_kernel(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, int dimX, int dimY)
+{
+	int i1,i2,j1,j2;
+    float gradXXc, gradYYc;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+            /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+					
+                    gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
+                    gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
+
+                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
+		}
+	return;
+} 
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+__global__ void Weighted_Laplc3D_kernel(float *W_Lapl, float *U0, float sigma, int dimX, int dimY, int dimZ)
+{
+		int i1,i2,j1,j2,k1,k2;
+		float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+		int j = blockDim.y * blockIdx.y + threadIdx.y;
+		int k = blockDim.z * blockIdx.z + threadIdx.z;
+		
+		if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+		    
+		    /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+			k1 = k+1; if (k1 == dimZ) k1 = k-1;
+			k2 = k-1; if (k2 < 0) k2 = k+1;
+		
+				int index = (dimX*dimY)*k + j*dimX+i;
+				
+				gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
+				gradX_sq = pow(gradX,2);
+				
+				gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
+                gradY_sq = pow(gradY,2);
+                
+                gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
+                gradZ_sq = pow(gradZ,2);
+                
+                gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
+                gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
+                gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
+                                
+                gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
+                gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
+                gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
+                
+                xy_2  = 2.0f*gradX*gradY*gradXY;
+                xyz_1 = 2.0f*gradX*gradZ*gradXZ;
+                xyz_2 = 2.0f*gradY*gradZ*gradYZ;
+                
+                denom =  gradX_sq + gradY_sq + gradZ_sq;
+                
+					if (denom <= EPS) {
+					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
+                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
+					}
+					else  {
+					V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
+                    V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
+					}
+
+                c = 1.0f/(1.0f + denom/sigma);
+                c_sq = c*c;
+                
+            W_Lapl[index] = c_sq*V_norm + c*V_orth;
+		}
+	return;
+}
+__global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, int dimX, int dimY, int dimZ)
+{
+	int i1,i2,j1,j2,k1,k2;
+    float gradXXc, gradYYc, gradZZc;
+
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+		int j = blockDim.y * blockIdx.y + threadIdx.y;
+		int k = blockDim.z * blockIdx.z + threadIdx.z;
+		
+		if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+		    
+		    /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == dimX) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == dimY) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+			k1 = k+1; if (k1 == dimZ) k1 = k-1;
+			k2 = k-1; if (k2 < 0) k2 = k+1;
+			
+			int index = (dimX*dimY)*k + j*dimX+i;
+			
+                    gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
+                    gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
+                    gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
+                    
+                    Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
+		}
+	return;
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
+{
+		int dimTotal, dev = 0;
+		CHECK(cudaSetDevice(dev));
+        float *d_input, *d_output, *d_W_Lapl;
+        float sigmaPar2;
+        sigmaPar2 = sigmaPar*sigmaPar;
+        dimTotal = N*M*Z;
+        
+        CHECK(cudaMalloc((void**)&d_input,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_output,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_W_Lapl,dimTotal*sizeof(float)));
+                
+        CHECK(cudaMemcpy(d_input,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        
+	if (Z == 1) {
+	     /*2D case */
+        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+        dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+             
+        for(int n=0; n < iterationsNumb; n++) {
+				/* Calculating weighted Laplacian */
+				Weighted_Laplc2D_kernel<<<dimGrid,dimBlock>>>(d_W_Lapl, d_output, sigmaPar2, N, M);
+				CHECK(cudaDeviceSynchronize());
+				/* Perform iteration step */
+				Diffusion_update_step2D_kernel<<<dimGrid,dimBlock>>>(d_output, d_input, d_W_Lapl, lambdaPar, sigmaPar2, tau, N, M);
+				CHECK(cudaDeviceSynchronize());
+        }
+	}
+	else {
+		/*3D case*/
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKZSIZE));
+			for(int n=0; n < iterationsNumb; n++) {
+				/* Calculating weighted Laplacian */
+				Weighted_Laplc3D_kernel<<<dimGrid,dimBlock>>>(d_W_Lapl, d_output, sigmaPar2, N, M, Z);
+				CHECK(cudaDeviceSynchronize());
+				/* Perform iteration step */
+				Diffusion_update_step3D_kernel<<<dimGrid,dimBlock>>>(d_output, d_input, d_W_Lapl, lambdaPar, sigmaPar2, tau, N, M, Z);
+				CHECK(cudaDeviceSynchronize());
+			}
+		}
+        CHECK(cudaMemcpy(Output,d_output,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+        CHECK(cudaFree(d_input));
+        CHECK(cudaFree(d_output));
+        CHECK(cudaFree(d_W_Lapl));
+        return 0;
+}
diff --git a/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
new file mode 100644
index 0000000..77d5d79
--- /dev/null
+++ b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __Diff_4thO_GPU_H__
+#define __Diff_4thO_GPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
new file mode 100644
index 0000000..87871be
--- /dev/null
+++ b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
@@ -0,0 +1,473 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "LLT_ROF_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+ * 
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
+* lambdaLLT starting with smaller values. 
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step 
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References: 
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+    
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+
+#define EPS_LLT 0.01
+#define EPS_ROF 1.0e-12
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int signLLT (float x)
+{
+        return (x > 0) - (x < 0);
+}        
+   
+/*************************************************************************/
+/**********************LLT-related functions *****************************/
+/*************************************************************************/
+__global__ void der2D_LLT_kernel(float *U, float *D1, float *D2, int dimX, int dimY)
+    {
+		int i_p, i_m, j_m, j_p;
+		float dxx, dyy, denom_xx, denom_yy;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+			dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
+			dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
+
+			denom_xx = abs(dxx) + EPS_LLT;
+			denom_yy = abs(dyy) + EPS_LLT;
+
+			D1[index] = dxx / denom_xx;
+			D2[index] = dyy / denom_yy;
+		}
+	}
+	
+__global__ void der3D_LLT_kernel(float* U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
+    {
+		int i_p, i_m, j_m, j_p, k_p, k_m;
+		float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+			
+        /* symmetric boundary conditions (Neuman) */
+ 		i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ 		i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ 		j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ 		j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ 		k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ 		k_m = k - 1; if (k_m < 0) k_m = k + 1;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;
+      	
+      	dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
+ 		dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
+ 		dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
+ 
+ 		denom_xx = abs(dxx) + EPS_LLT;
+ 		denom_yy = abs(dyy) + EPS_LLT;
+ 		denom_zz = abs(dzz) + EPS_LLT;
+ 
+ 		D1[index] = dxx / denom_xx;
+ 		D2[index] = dyy / denom_yy;
+ 		D3[index] = dzz / denom_zz;
+		}
+	}
+
+/*************************************************************************/
+/**********************ROF-related functions *****************************/
+/*************************************************************************/
+
+/* first-order differences 1 */
+__global__ void D1_func2D_ROF_kernel(float* Input, float* D1, int N, int M)
+    {
+		int i1, j1, i2;
+		float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;        
+        
+        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+            
+            /* boundary conditions (Neumann reflections) */
+                i1 = i + 1; if (i1 >= N) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= M) j1 = j-1;
+		
+		     /* Forward-backward differences */
+                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */                
+                NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
+                
+                denom1 = NOMx_1*NOMx_1;
+                denom2 = 0.5f*(signLLT((float)NOMy_1) + signLLT((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0)));
+                denom2 = denom2*denom2;
+                T1 = sqrt(denom1 + denom2 + EPS_ROF);
+                D1[index] = NOMx_1/T1;
+		}		
+	}
+	
+/* differences 2 */
+__global__ void D2_func2D_ROF_kernel(float* Input, float* D2, int N, int M)      
+    {
+		int i1, j1, j2;
+		float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;        
+        
+        if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+            
+            /* boundary conditions (Neumann reflections) */
+                i1 = i + 1; if (i1 >= N) i1 = i-1;
+                j1 = j + 1; if (j1 >= M) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1; 
+		
+                /* Forward-backward differences */
+                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+                NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
+                
+                denom1 = NOMy_1*NOMy_1;
+                denom2 = 0.5f*(signLLT((float)NOMx_1) + signLLT((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0)));
+                denom2 = denom2*denom2;
+                T2 = sqrt(denom1 + denom2 + EPS_ROF);
+                D2[index] = NOMy_1/T2;	
+		}		
+	}
+
+ 
+    /* differences 1 */
+__global__ void D1_func3D_ROF_kernel(float* Input, float* D1, int dimX, int dimY, int dimZ)      
+    {
+		float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+		int i1,i2,k1,j1,j2,k2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+            
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
+                    NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */                    
+                    NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+                    
+                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMx_1*NOMx_1;
+                    denom2 = 0.5*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T1 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+                    D1[index] = NOMx_1/T1;	
+		}		
+	}      
+
+    /* differences 2 */
+    __global__ void D2_func3D_ROF_kernel(float* Input, float* D2, int dimX, int dimY, int dimZ)      
+    {
+		float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+		int i1,i2,k1,j1,j2,k2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;     
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+                    NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+                    NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMy_1*NOMy_1;
+                    denom2 = 0.5*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T2 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+                    D2[index] = NOMy_1/T2;
+		}
+	}
+	
+	  /* differences 3 */
+    __global__ void D3_func3D_ROF_kernel(float* Input, float* D3, int dimX, int dimY, int dimZ)      
+    {
+		float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+		int i1,i2,k1,j1,j2,k2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;     
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+				i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                k2 = k - 1; if (k2 < 0) k2 = k+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+                NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+                NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+                NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+               
+                denom1 = NOMz_1*NOMz_1;
+                denom2 = 0.5*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+                denom2 = denom2*denom2;
+                denom3 = 0.5*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+                denom3 = denom3*denom3;
+                T3 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+                D3[index] = NOMz_1/T3;
+		}
+	}
+/*************************************************************************/
+/**********************ROF-LLT-related functions *************************/
+/*************************************************************************/
+
+__global__ void Update2D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, int dimX, int dimY)
+{
+		
+		int i_p, i_m, j_m, j_p;
+		float div, laplc, dxx, dyy, dv1, dv2;
+	
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+			index = j*dimX+i;
+					
+			/*LLT-related part*/
+			dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
+			dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
+			laplc = dxx + dyy; /*build Laplacian*/
+			/*ROF-related part*/
+			dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
+            dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
+			div = dv1 + dv2; /*build Divirgent*/
+            
+			/*combine all into one cost function to minimise */
+            U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+		}
+}
+
+__global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, int dimX, int dimY, int dimZ)
+{
+	int i_p, i_m, j_m, j_p, k_p, k_m;
+	float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
+	
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+			
+			/* symmetric boundary conditions (Neuman) */
+			i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+			i_m = i - 1; if (i_m < 0) i_m = i + 1;
+			j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+			j_m = j - 1; if (j_m < 0) j_m = j + 1;
+			k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+			k_m = k - 1; if (k_m < 0) k_m = k + 1;
+        
+			int index = (dimX*dimY)*k + j*dimX+i;
+      	
+			/*LLT-related part*/
+			dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
+			dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
+			dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
+			laplc = dxx + dyy + dzz; /*build Laplacian*/
+			
+			/*ROF-related part*/
+			dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
+            dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
+            dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
+			div = dv1 + dv2 + dv3; /*build Divirgent*/
+            
+			/*combine all into one cost function to minimise */
+            U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+        }
+}
+
+/*******************************************************************/
+/************************ HOST FUNCTION ****************************/
+/*******************************************************************/
+
+extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
+{
+	    // set up device
+		int dev = 0;
+		int DimTotal;
+		DimTotal = N*M*Z;
+		CHECK(cudaSetDevice(dev));
+        float *d_input, *d_update;
+        float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL;
+        
+	if (Z == 0) {Z = 1;}
+	
+        CHECK(cudaMalloc((void**)&d_input,DimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_update,DimTotal*sizeof(float)));
+        
+        CHECK(cudaMalloc((void**)&D1_LLT,DimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&D2_LLT,DimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&D3_LLT,DimTotal*sizeof(float)));
+        
+        CHECK(cudaMalloc((void**)&D1_ROF,DimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&D2_ROF,DimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&D3_ROF,DimTotal*sizeof(float)));
+        
+        CHECK(cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        CHECK(cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        
+    if (Z == 1) {
+			// TV - 2D case
+            dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+            dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+             
+            for(int n=0; n < iterationsNumb; n++) {
+                /****************ROF******************/
+				/* calculate first-order differences */
+                D1_func2D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D1_ROF, N, M);
+                CHECK(cudaDeviceSynchronize());
+				D2_func2D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D2_ROF, N, M);
+                CHECK(cudaDeviceSynchronize());                
+                /****************LLT******************/
+                 /* estimate second-order derrivatives */
+				der2D_LLT_kernel<<<dimGrid,dimBlock>>>(d_update, D1_LLT, D2_LLT, N, M);
+				/* Joint update for ROF and LLT models */
+				Update2D_LLT_ROF_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, N, M);
+                CHECK(cudaDeviceSynchronize());
+            }
+    }
+    else {
+			// 3D case
+            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+            dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
+           
+            for(int n=0; n < iterationsNumb; n++) {
+                /****************ROF******************/
+				/* calculate first-order differences */
+                D1_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D1_ROF, N, M, Z);
+                CHECK(cudaDeviceSynchronize());
+				D2_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D2_ROF, N, M, Z);
+                CHECK(cudaDeviceSynchronize());        
+                D3_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D3_ROF, N, M, Z);
+                CHECK(cudaDeviceSynchronize());        
+                /****************LLT******************/
+                 /* estimate second-order derrivatives */
+				der3D_LLT_kernel<<<dimGrid,dimBlock>>>(d_update, D1_LLT, D2_LLT, D3_LLT, N, M, Z);
+				/* Joint update for ROF and LLT models */
+				Update3D_LLT_ROF_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, N, M, Z);
+                CHECK(cudaDeviceSynchronize());
+            }
+    }        
+        CHECK(cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+        CHECK(cudaFree(d_input));
+        CHECK(cudaFree(d_update));
+        CHECK(cudaFree(D1_LLT));
+        CHECK(cudaFree(D2_LLT));
+        CHECK(cudaFree(D3_LLT));
+        CHECK(cudaFree(D1_ROF));
+        CHECK(cudaFree(D2_ROF));
+        CHECK(cudaFree(D3_ROF));
+        return 0;
+}
diff --git a/src/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.h
new file mode 100644
index 0000000..a6bfcc7
--- /dev/null
+++ b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __ROFLLTGPU_H__
+#define __ROFLLTGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/src/Core/regularisers_GPU/NonlDiff_GPU_core.cu
new file mode 100644
index 0000000..ff7ce4d
--- /dev/null
+++ b/src/Core/regularisers_GPU/NonlDiff_GPU_core.cu
@@ -0,0 +1,345 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "NonlDiff_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+    
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-5
+    
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int signNDF (float x)
+{
+        return (x > 0) - (x < 0);
+}        
+   
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void LinearDiff2D_kernel(float *Input, float *Output, float lambdaPar, float tau, int N, int M)
+    {
+		int i1,i2,j1,j2;
+		float e,w,n,s,e1,w1,n1,s1;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;
+        
+        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+            
+            /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == N) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            
+		        e = Output[j*N+i1];
+                w = Output[j*N+i2];
+                n = Output[j1*N+i];
+                s = Output[j2*N+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index])); 
+		}
+	} 
+    
+ __global__ void NonLinearDiff2D_kernel(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, int N, int M)
+    {
+		int i1,i2,j1,j2;
+		float e,w,n,s,e1,w1,n1,s1;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;
+        
+        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+            
+            /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == N) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+            
+		        e = Output[j*N+i1];
+                w = Output[j*N+i2];
+                n = Output[j1*N+i];
+                s = Output[j2*N+i];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                
+            if (penaltytype == 1){
+            /* Huber penalty */
+            if (abs(e1) > sigmaPar) e1 =  signNDF(e1);
+            else e1 = e1/sigmaPar;
+            
+            if (abs(w1) > sigmaPar) w1 =  signNDF(w1);
+            else w1 = w1/sigmaPar;
+            
+            if (abs(n1) > sigmaPar) n1 =  signNDF(n1);
+            else n1 = n1/sigmaPar;
+            
+            if (abs(s1) > sigmaPar) s1 =  signNDF(s1);
+            else s1 = s1/sigmaPar;
+            }
+            else if (penaltytype == 2) {
+            /* Perona-Malik */
+            e1 = (e1)/(1.0f + pow((e1/sigmaPar),2));
+            w1 = (w1)/(1.0f + pow((w1/sigmaPar),2));
+            n1 = (n1)/(1.0f + pow((n1/sigmaPar),2));
+            s1 = (s1)/(1.0f + pow((s1/sigmaPar),2));
+            }
+            else if (penaltytype == 3) {
+            /* Tukey Biweight */
+            if (abs(e1) <= sigmaPar) e1 =  e1*pow((1.0f - pow((e1/sigmaPar),2)), 2);
+            else e1 = 0.0f;
+            if (abs(w1) <= sigmaPar) w1 =  w1*pow((1.0f - pow((w1/sigmaPar),2)), 2);
+            else w1 = 0.0f;
+            if (abs(n1) <= sigmaPar) n1 =  n1*pow((1.0f - pow((n1/sigmaPar),2)), 2);
+            else n1 = 0.0f;
+            if (abs(s1) <= sigmaPar) s1 =  s1*pow((1.0f - pow((s1/sigmaPar),2)), 2);
+            else s1 = 0.0f;
+            }
+            else printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+                            
+            Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index])); 
+		}
+	} 
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+
+__global__ void LinearDiff3D_kernel(float *Input, float *Output, float lambdaPar, float tau, int N, int M, int Z)
+    {
+		int i1,i2,j1,j2,k1,k2;
+		float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+		int j = blockDim.y * blockIdx.y + threadIdx.y;
+		int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+		int index = (N*M)*k + i + N*j;        
+        
+        if ((i >= 0) && (i < N) && (j >= 0) && (j < M) && (k >= 0) && (k < Z)) {
+            
+            /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == N) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+			k1 = k+1; if (k1 == Z) k1 = k-1;
+			k2 = k-1; if (k2 < 0) k2 = k+1;
+            
+		        e = Output[(N*M)*k + i1 + N*j];
+                w = Output[(N*M)*k + i2 + N*j];
+                n = Output[(N*M)*k + i + N*j1];
+                s = Output[(N*M)*k + i + N*j2];
+                u = Output[(N*M)*k1 + i + N*j];
+                d = Output[(N*M)*k2 + i + N*j];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                u1 = u - Output[index];
+                d1 = d - Output[index];
+                
+                Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index])); 
+		}
+	} 
+
+__global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, int N, int M, int Z)
+    {
+		int i1,i2,j1,j2,k1,k2;
+		float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+		int j = blockDim.y * blockIdx.y + threadIdx.y;
+		int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+		int index = (N*M)*k + i + N*j;        
+        
+        if ((i >= 0) && (i < N) && (j >= 0) && (j < M) && (k >= 0) && (k < Z)) {
+            
+            /* boundary conditions (Neumann reflections) */
+			i1 = i+1; if (i1 == N) i1 = i-1;
+			i2 = i-1; if (i2 < 0) i2 = i+1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            j2 = j-1; if (j2 < 0) j2 = j+1;
+			k1 = k+1; if (k1 == Z) k1 = k-1;
+			k2 = k-1; if (k2 < 0) k2 = k+1;
+            
+		        e = Output[(N*M)*k + i1 + N*j];
+                w = Output[(N*M)*k + i2 + N*j];
+                n = Output[(N*M)*k + i + N*j1];
+                s = Output[(N*M)*k + i + N*j2];
+                u = Output[(N*M)*k1 + i + N*j];
+                d = Output[(N*M)*k2 + i + N*j];
+                
+                e1 = e - Output[index];
+                w1 = w - Output[index];
+                n1 = n - Output[index];
+                s1 = s - Output[index];
+                u1 = u - Output[index];
+                d1 = d - Output[index];
+                
+                
+            if (penaltytype == 1){
+            /* Huber penalty */
+            if (abs(e1) > sigmaPar) e1 =  signNDF(e1);
+            else e1 = e1/sigmaPar;
+            
+            if (abs(w1) > sigmaPar) w1 =  signNDF(w1);
+            else w1 = w1/sigmaPar;
+            
+            if (abs(n1) > sigmaPar) n1 =  signNDF(n1);
+            else n1 = n1/sigmaPar;
+            
+            if (abs(s1) > sigmaPar) s1 =  signNDF(s1);
+            else s1 = s1/sigmaPar;
+            
+            if (abs(u1) > sigmaPar) u1 =  signNDF(u1);
+            else u1 = u1/sigmaPar;
+            
+            if (abs(d1) > sigmaPar) d1 =  signNDF(d1);
+            else d1 = d1/sigmaPar;            
+            }
+            else if (penaltytype == 2) {
+            /* Perona-Malik */
+            e1 = (e1)/(1.0f + pow((e1/sigmaPar),2));
+            w1 = (w1)/(1.0f + pow((w1/sigmaPar),2));
+            n1 = (n1)/(1.0f + pow((n1/sigmaPar),2));
+            s1 = (s1)/(1.0f + pow((s1/sigmaPar),2));
+            u1 = (u1)/(1.0f + pow((u1/sigmaPar),2));
+            d1 = (d1)/(1.0f + pow((d1/sigmaPar),2));
+            }
+            else if (penaltytype == 3) {
+            /* Tukey Biweight */
+            if (abs(e1) <= sigmaPar) e1 =  e1*pow((1.0f - pow((e1/sigmaPar),2)), 2);
+            else e1 = 0.0f;
+            if (abs(w1) <= sigmaPar) w1 =  w1*pow((1.0f - pow((w1/sigmaPar),2)), 2);
+            else w1 = 0.0f;
+            if (abs(n1) <= sigmaPar) n1 =  n1*pow((1.0f - pow((n1/sigmaPar),2)), 2);
+            else n1 = 0.0f;
+            if (abs(s1) <= sigmaPar) s1 =  s1*pow((1.0f - pow((s1/sigmaPar),2)), 2);
+            else s1 = 0.0f;
+            if (abs(u1) <= sigmaPar) u1 =  u1*pow((1.0f - pow((u1/sigmaPar),2)), 2);
+            else u1 = 0.0f;
+            if (abs(d1) <= sigmaPar) d1 =  d1*pow((1.0f - pow((d1/sigmaPar),2)), 2);
+            else d1 = 0.0f;
+            }
+            else printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+
+            Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index])); 
+		}
+	} 
+
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
+{
+	    // set up device
+		int dev = 0;
+		CHECK(cudaSetDevice(dev));
+        float *d_input, *d_output;
+        float sigmaPar2;
+        sigmaPar2 = sigmaPar/sqrt(2.0f);
+        
+        CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_output,N*M*Z*sizeof(float)));
+                
+        CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+        CHECK(cudaMemcpy(d_output,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));      
+        
+	if (Z == 1) {
+	     /*2D case */ 
+        
+        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+        dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+             
+        for(int n=0; n < iterationsNumb; n++) {
+				if (sigmaPar == 0.0f) {
+				/* linear diffusion (heat equation) */
+				LinearDiff2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, tau, N, M);
+				CHECK(cudaDeviceSynchronize());
+				}
+				else {
+				/* nonlinear diffusion */
+				NonLinearDiff2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, sigmaPar2, tau, penaltytype, N, M);
+				CHECK(cudaDeviceSynchronize());
+				}
+        }
+	}
+	else {
+		/*3D case*/
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKZSIZE));
+			for(int n=0; n < iterationsNumb; n++) {
+				if (sigmaPar == 0.0f) {
+				/* linear diffusion (heat equation) */
+				LinearDiff3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, tau, N, M, Z);
+				CHECK(cudaDeviceSynchronize());
+				}
+				else {
+				/* nonlinear diffusion */
+				NonLinearDiff3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, sigmaPar2, tau, penaltytype, N, M, Z);
+				CHECK(cudaDeviceSynchronize());
+				}
+			}
+        
+		}        
+        CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+        CHECK(cudaFree(d_input));
+        CHECK(cudaFree(d_output));
+        //cudaDeviceReset();
+        return 0;
+}
diff --git a/src/Core/regularisers_GPU/NonlDiff_GPU_core.h b/src/Core/regularisers_GPU/NonlDiff_GPU_core.h
new file mode 100644
index 0000000..5fe457e
--- /dev/null
+++ b/src/Core/regularisers_GPU/NonlDiff_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __NonlDiffGPU_H__
+#define __NonlDiffGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
new file mode 100644
index 0000000..98c8488
--- /dev/null
+++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -0,0 +1,460 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PatchSelect_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D grayscale image (classical 3D version will not be supported but rather 2D + dim extension (TODO))
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ */
+
+
+#define BLKXSIZE 16
+#define BLKYSIZE 16
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define M_PI 3.14159265358979323846
+#define EPS 1.0e-8
+#define CONSTVECSIZE5 121
+#define CONSTVECSIZE7 225
+#define CONSTVECSIZE9 361
+#define CONSTVECSIZE11 529
+#define CONSTVECSIZE13 729
+
+__device__ void swap(float *xp, float *yp) 
+{
+    float temp = *xp; 
+    *xp = *yp; 
+    *yp = temp; 
+}
+__device__ void swapUS(unsigned short *xp, unsigned short *yp) 
+{ 
+    unsigned short temp = *xp; 
+    *xp = *yp; 
+    *yp = temp; 
+}
+
+/********************************************************************************/
+__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{          
+
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+    float normsum;
+    
+    float Weight_Vec[CONSTVECSIZE5];
+    unsigned short ind_i[CONSTVECSIZE5];
+    unsigned short ind_j[CONSTVECSIZE5];
+
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    long index = i*M+j;      
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+              if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
+                                counterG++;
+                            }}                        
+                     }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = __expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+             }
+        }}
+        
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }     
+    /*sorting loop finished*/        
+    /*now select the NumNeighb more prominent weights and store into arrays */     
+    for(x=0; x < NumNeighb; x++) {
+        index2 = (N*M*x) + index;
+        H_i_d[index2] = ind_i[x];
+        H_j_d[index2] = ind_j[x];
+        Weights_d[index2] = Weight_Vec[x];
+    }
+} 
+/********************************************************************************/
+__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{          
+
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+    float normsum;
+    
+    float Weight_Vec[CONSTVECSIZE7];
+    unsigned short ind_i[CONSTVECSIZE7];
+    unsigned short ind_j[CONSTVECSIZE7];
+
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    long index = i*M+j;      
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+             if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
+                                counterG++;
+                            }}                        
+                     }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = __expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+             }
+        }}
+        
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }     
+    /*sorting loop finished*/        
+    /*now select the NumNeighb more prominent weights and store into arrays */     
+    for(x=0; x < NumNeighb; x++) {
+        index2 = (N*M*x) + index;
+        H_i_d[index2] = ind_i[x];
+        H_j_d[index2] = ind_j[x];
+        Weights_d[index2] = Weight_Vec[x];
+    }
+}
+__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{          
+
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+    float normsum;
+  
+    float Weight_Vec[CONSTVECSIZE9];
+    unsigned short ind_i[CONSTVECSIZE9];
+    unsigned short ind_j[CONSTVECSIZE9];
+
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    long index = i*M+j;      
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+                                counterG++;
+                            }}                        
+                     }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+            }
+        }}
+        
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }     
+    /*sorting loop finished*/        
+    /*now select the NumNeighb more prominent weights and store into arrays */     
+    for(x=0; x < NumNeighb; x++) {
+        index2 = (N*M*x) + index;
+        H_i_d[index2] = ind_i[x];
+        H_j_d[index2] = ind_j[x];
+        Weights_d[index2] = Weight_Vec[x];
+    }                     
+}
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{          
+
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+    float normsum;
+    
+    float Weight_Vec[CONSTVECSIZE11];
+    unsigned short ind_i[CONSTVECSIZE11];
+    unsigned short ind_j[CONSTVECSIZE11];
+
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    long index = i*M+j;      
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
+                                counterG++;
+                            }}                        
+                     }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = __expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+           }
+        }}
+        
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }     
+    /*sorting loop finished*/        
+    /*now select the NumNeighb more prominent weights and store into arrays */     
+    for(x=0; x < NumNeighb; x++) {
+        index2 = (N*M*x) + index;
+        H_i_d[index2] = ind_i[x];
+        H_j_d[index2] = ind_j[x];
+        Weights_d[index2] = Weight_Vec[x];
+    }
+} 
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{          
+
+    long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+    float normsum;
+    
+    float Weight_Vec[CONSTVECSIZE13];
+    unsigned short ind_i[CONSTVECSIZE13];
+    unsigned short ind_j[CONSTVECSIZE13];
+
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    long index = i*M+j;      
+    
+    counter = 0;
+    for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+        for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+            i1 = i+i_m;
+            j1 = j+j_m;
+            if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+                normsum = 0.0f; counterG = 0;
+                for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+                    for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+                        i2 = i1 + i_c;
+                        j2 = j1 + j_c;
+                        i3 = i + i_c;
+                        j3 = j + j_c;
+                        if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+                            if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+                                normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);                                
+                                counterG++;
+                            }}                        
+                     }}
+                /* writing temporarily into vectors */
+                if (normsum > EPS) {
+                    Weight_Vec[counter] = __expf(-normsum/h2);
+                    ind_i[counter] = i1;
+                    ind_j[counter] = j1;
+                    counter++;
+                }
+             }
+        }}
+        
+    /* do sorting to choose the most prominent weights [HIGH to LOW] */
+    /* and re-arrange indeces accordingly */
+    for (x = 0; x < counter-1; x++)  {
+       for (y = 0; y < counter-x-1; y++)  {
+           if (Weight_Vec[y] < Weight_Vec[y+1]) {
+            swap(&Weight_Vec[y], &Weight_Vec[y+1]); 		                       
+            swapUS(&ind_i[y], &ind_i[y+1]);
+            swapUS(&ind_j[y], &ind_j[y+1]);  
+            }
+    	}
+    }     
+    /*sorting loop finished*/        
+    /*now select the NumNeighb more prominent weights and store into arrays */     
+    for(x=0; x < NumNeighb; x++) {
+        index2 = (N*M*x) + index;
+        H_i_d[index2] = ind_i[x];
+        H_j_d[index2] = ind_j[x];
+        Weights_d[index2] = Weight_Vec[x];
+    }
+} 
+
+   
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return -1;
+    }  
+      
+    int SearchW_full, SimilW_full, counterG, i, j;
+    float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;    
+    unsigned short *H_i_d, *H_j_d;
+    h2 = h*h;
+    
+    dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+    dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));    
+       
+    SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window  size */
+    SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1);   /* the full similarity window  size */
+    
+    /* generate a 2D Gaussian kernel for NLM procedure */
+    Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
+    counterG = 0;
+    for(i=-SimilarWin; i<=SimilarWin; i++) {
+         for(j=-SimilarWin; j<=SimilarWin; j++) {
+              Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin));
+              counterG++;
+    }} /*main neighb loop */
+    
+    
+    /*allocate space on the device*/
+    checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+    checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
+    checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );
+    checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) );
+    checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
+
+    /* copy data from the host to the device */
+    checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );    
+ 
+    /********************** Run CUDA kernel here ********************/
+    if (SearchWindow == 5)  IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+    else if (SearchWindow == 7)  IndexSelect2D_7_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+    else if (SearchWindow == 9)  IndexSelect2D_9_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+    else if (SearchWindow == 11)  IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+    else if (SearchWindow == 13)  IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+    else {
+    fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+        return -1;}    
+    checkCudaErrors(cudaPeekAtLastError() );        
+    checkCudaErrors(cudaDeviceSynchronize());   
+    /***************************************************************/    
+        
+    checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
+    checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );  
+    checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );   
+    
+    
+    cudaFree(Ad); 
+    cudaFree(H_i_d); 
+    cudaFree(H_j_d);    
+    cudaFree(Weights_d);
+    cudaFree(Eucl_Vec_d);
+    cudaDeviceReset();
+    return 0;
+}
diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.h b/src/Core/regularisers_GPU/PatchSelect_GPU_core.h
new file mode 100644
index 0000000..8c124d3
--- /dev/null
+++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __NLREG_KERNELS_H_
+#define __NLREG_KERNELS_H_
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/TGV_GPU_core.cu b/src/Core/regularisers_GPU/TGV_GPU_core.cu
new file mode 100644
index 0000000..58b2c41
--- /dev/null
+++ b/src/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -0,0 +1,625 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "TGV_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+    
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8    
+    
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-7
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
+{    
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); 
+            else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index])  - V1[index]); 
+            if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index])  - V2[index]);
+            else  P2[index] += sigma*((U[(j+1)*dimX+i] - U[index])  - V2[index]);
+		}
+	return;
+} 
+
+__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
+{
+   	float grad_magn;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+            
+            grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2));
+            grad_magn = grad_magn/alpha1;
+            if (grad_magn > 1.0f) {
+                P1[index] /= grad_magn;
+                P2[index] /= grad_magn;
+            }
+		}
+	return;
+} 
+
+__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
+{
+        float q1, q2, q11, q22;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {            
+            /* symmetric boundary conditions (Neuman) */
+    	    q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
+            /* boundary conditions (Neuman) */
+            if (i != dimX-1){
+                q1 = V1[j*dimX+(i+1)] - V1[index];
+                q11 = V2[j*dimX+(i+1)] - V2[index];
+            }
+            if (j != dimY-1) {
+                q2 = V2[(j+1)*dimX+i] - V2[index];
+                q22 = V1[(j+1)*dimX+i] - V1[index];
+            }
+            Q1[index] += sigma*(q1);
+            Q2[index] += sigma*(q2);
+            Q3[index] += sigma*(0.5f*(q11 + q22));
+	}
+	return;
+} 
+
+__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
+{
+	float grad_magn;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {            
+            grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0f) {
+                Q1[index] /= grad_magn;
+                Q2[index] /= grad_magn;
+                Q3[index] /= grad_magn;
+        	    }
+	}
+	return;
+} 
+
+__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
+{
+	float P_v1, P_v2, div;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+			
+            if (i == 0) P_v1 = P1[index];
+            else P_v1 = P1[index] - P1[j*dimX+(i-1)];
+            if (j == 0) P_v2 = P2[index];
+            else  P_v2 = P2[index] - P2[(j-1)*dimX+i];
+            div = P_v1 + P_v2;
+            U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+		}
+	return;
+} 
+
+__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
+{
+	float q1, q3_x, q2, q3_y, div1, div2;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + dimX*j;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {			
+   	    q2 = 0.0f;  q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0;
+            /* boundary conditions (Neuman) */
+            if (i != 0) {
+                q1 = Q1[index] - Q1[j*dimX+(i-1)];
+                q3_x = Q3[index] - Q3[j*dimX+(i-1)];
+            }
+            if (j != 0) {
+                q2 = Q2[index] - Q2[(j-1)*dimX+i];
+                q3_y = Q3[index] - Q3[(j-1)*dimX+i];
+            }
+            div1 = q1 + q3_y;
+            div2 = q3_x + q2;
+            V1[index] += tau*(P1[index] + div1);
+            V2[index] += tau*(P2[index] + div2);
+	}
+	return;
+} 
+
+__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)   {
+        U_old[index] = U[index];
+    }
+}
+
+__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)   {
+        V1_old[index] = V1[index];
+        V2_old[index] = V2[index];
+    }
+}
+
+__global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        U[index] = 2.0f*U[index] - U_old[index];
+    }
+}
+
+
+__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        V1[index] = 2.0f*V1[index] - V1_old[index];
+        V2[index] = 2.0f*V2[index] - V2_old[index];  
+    }
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+{    
+	int index;
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+	
+	    index = (dimX*dimY)*k + j*dimX+i;
+            /* symmetric boundary conditions (Neuman) */
+            if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); 
+            else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index])  - V1[index]); 
+            if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index])  - V2[index]);
+            else  P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index])  - V2[index]);
+            if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index])  - V3[index]);
+            else  P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index])  - V3[index]);
+	}
+	return;
+} 
+
+__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1)
+{
+   	float grad_magn;
+   	int index;
+   	
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
+	    index = (dimX*dimY)*k + j*dimX+i;
+            
+            grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+            if (grad_magn > 1.0f) {
+                P1[index] /= grad_magn;
+                P2[index] /= grad_magn;
+                P3[index] /= grad_magn;
+            }
+	}
+	return;
+}
+
+__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma)
+{
+	int index; 
+        float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
+	    
+	    index = (dimX*dimY)*k + j*dimX+i;	    
+	    q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+            /* symmetric boundary conditions (Neuman) */
+            if (i != dimX-1){ 
+                q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];              
+                q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
+                q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
+            }
+            if (j != dimY-1) {
+                q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];                
+                q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
+                q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
+            }
+            if (k != dimZ-1) {
+                q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
+                q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
+                q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
+            }
+            
+            Q1[index] += sigma*(q1); /*Q11*/
+            Q2[index] += sigma*(q2); /*Q22*/            
+            Q3[index] += sigma*(q3); /*Q33*/
+            Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
+            Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
+            Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
+	}
+	return;
+}
+
+
+__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0)
+{
+	float grad_magn;
+	int index;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
+	    
+        index = (dimX*dimY)*k + j*dimX+i;	
+	
+	grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+            grad_magn = grad_magn/alpha0;
+            if (grad_magn > 1.0f) {
+                Q1[index] /= grad_magn;
+                Q2[index] /= grad_magn;
+                Q3[index] /= grad_magn;
+                Q4[index] /= grad_magn;
+                Q5[index] /= grad_magn;
+                Q6[index] /= grad_magn;
+            }
+	}
+	return;
+} 
+__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+	float P_v1, P_v2, P_v3, div;
+	int index;
+
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
+
+        index = (dimX*dimY)*k + j*dimX+i;	
+			
+        if (i == 0) P_v1 = P1[index];
+        else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
+        if (j == 0) P_v2 = P2[index];
+        else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
+        if (k == 0) P_v3 = P3[index];
+        else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];              
+                      
+        div = P_v1 + P_v2 + P_v3;
+        U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);             
+	}
+	return;
+}
+__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau)
+{
+	float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
+	int index;
+	
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {	
+
+        index = (dimX*dimY)*k + j*dimX+i;	
+        
+	q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
+        /* Q1 - Q11, Q2 - Q22, Q3 -  Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/            
+        /* symmetric boundary conditions (Neuman) */
+        if (i != 0) {
+                q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+                q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];                
+                q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)];
+        }
+       if (j != 0) {
+                q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+                q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+                q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i];
+       }
+       if (k != 0) {
+                q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+                q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+                q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+       }
+       div1 = q1 + q4y + q5z;
+       div2 = q4x + q2 + q6z;            
+       div3 = q5x + q6y + q3;
+            
+        V1[index] += tau*(P1[index] + div1);
+        V2[index] += tau*(P2[index] + div2);
+        V3[index] += tau*(P3[index] + div3);
+	}
+	return;
+} 
+
+__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+{
+    int index;
+	
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;    
+    
+    index = (dimX*dimY)*k + j*dimX+i;
+    
+    if (index < num_total) {	
+      	U_old[index] = U[index];	
+    }
+}
+
+__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+{
+    int index;
+	
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;    
+    
+    index = (dimX*dimY)*k + j*dimX+i;
+    
+    if (index < num_total) {	
+      	V1_old[index] = V1[index];
+	V2_old[index] = V2[index];
+	V3_old[index] = V3[index];	
+    }
+}
+
+__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+{
+     int index;
+	
+     int i = blockDim.x * blockIdx.x + threadIdx.x;
+     int j = blockDim.y * blockIdx.y + threadIdx.y;
+     int k = blockDim.z * blockIdx.z + threadIdx.z;    
+         
+     index = (dimX*dimY)*k + j*dimX+i;
+    
+    if (index < num_total) {
+	   U[index] = 2.0f*U[index] - U_old[index];
+    }
+}  
+
+__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+{
+     int index;
+	
+     int i = blockDim.x * blockIdx.x + threadIdx.x;
+     int j = blockDim.y * blockIdx.y + threadIdx.y;
+     int k = blockDim.z * blockIdx.z + threadIdx.z;    
+         
+     index = (dimX*dimY)*k + j*dimX+i;
+    
+    if (index < num_total) {
+	   V1[index] = 2.0f*V1[index] - V1_old[index];
+	   V2[index] = 2.0f*V2[index] - V2_old[index];
+	   V3[index] = 2.0f*V3[index] - V3_old[index];
+    }
+}  
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/************************ MAIN HOST FUNCTION ***********************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ)
+{
+	int dimTotal, dev = 0;
+	CHECK(cudaSetDevice(dev));
+	
+	dimTotal = dimX*dimY*dimZ;
+       
+        float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+        tau = pow(L2,-0.5);
+        sigma = pow(L2,-0.5);
+                                      
+        CHECK(cudaMalloc((void**)&d_U0,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_U,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&U_old,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&P1,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&P2,dimTotal*sizeof(float)));
+        
+        CHECK(cudaMalloc((void**)&Q1,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q2,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q3,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V1,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V2,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V1_old,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V2_old,dimTotal*sizeof(float)));
+        
+        CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));      
+        
+        if (dimZ == 1) {
+	/*2D case */
+        dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+        dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+             
+        for(int n=0; n < iterationsNumb; n++) {
+			
+	    /* Calculate Dual Variable P */
+            DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
+	    CHECK(cudaDeviceSynchronize());
+            /*Projection onto convex set for P*/
+            ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
+            CHECK(cudaDeviceSynchronize());
+            /* Calculate Dual Variable Q */
+            DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
+            CHECK(cudaDeviceSynchronize());
+             /*Projection onto convex set for Q*/
+            ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0);
+            CHECK(cudaDeviceSynchronize());
+            /*saving U into U_old*/
+            copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, dimX, dimY, lambda, tau);
+            CHECK(cudaDeviceSynchronize());
+            /*get updated solution U*/
+            newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /*saving V into V_old*/
+            copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /* upd V*/
+            UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
+            CHECK(cudaDeviceSynchronize());
+            /*get new V*/
+            newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
+            CHECK(cudaDeviceSynchronize());            
+	        }
+        }
+        else {
+        /*3D case */
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKXSIZE));
+        
+        float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
+        
+	CHECK(cudaMalloc((void**)&P3,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q4,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q5,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&Q6,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float)));
+        CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float)));
+        
+        for(int n=0; n < iterationsNumb; n++) {
+			
+	    /* Calculate Dual Variable P */
+            DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
+	    CHECK(cudaDeviceSynchronize());
+            /*Projection onto convex set for P*/
+            ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1);
+            CHECK(cudaDeviceSynchronize());
+            /* Calculate Dual Variable Q */
+            DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma);
+            CHECK(cudaDeviceSynchronize());
+             /*Projection onto convex set for Q*/
+            ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0);
+            CHECK(cudaDeviceSynchronize());
+            /*saving U into U_old*/
+            copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /*adjoint operation  -> divergence and projection of P*/
+            DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau);
+            CHECK(cudaDeviceSynchronize());
+            /*get updated solution U*/
+            newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+            CHECK(cudaDeviceSynchronize());
+            /*saving V into V_old*/
+            copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);           
+            CHECK(cudaDeviceSynchronize());
+            /* upd V*/
+            UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau);
+            CHECK(cudaDeviceSynchronize());
+            /*get new V*/
+            newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+            CHECK(cudaDeviceSynchronize());            
+	        }
+	        
+        CHECK(cudaFree(Q4));
+        CHECK(cudaFree(Q5));
+        CHECK(cudaFree(Q6));
+        CHECK(cudaFree(P3));
+        CHECK(cudaFree(V3));
+        CHECK(cudaFree(V3_old));	                
+        }
+        
+        CHECK(cudaMemcpy(U,d_U,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+        CHECK(cudaFree(d_U0));
+        CHECK(cudaFree(d_U));
+        CHECK(cudaFree(U_old));
+        CHECK(cudaFree(P1));
+        CHECK(cudaFree(P2));
+        
+        CHECK(cudaFree(Q1));
+        CHECK(cudaFree(Q2));
+        CHECK(cudaFree(Q3));
+        CHECK(cudaFree(V1));
+        CHECK(cudaFree(V2));
+        CHECK(cudaFree(V1_old));
+        CHECK(cudaFree(V2_old));
+        return 0;
+}
diff --git a/src/Core/regularisers_GPU/TGV_GPU_core.h b/src/Core/regularisers_GPU/TGV_GPU_core.h
new file mode 100644
index 0000000..9f73d1c
--- /dev/null
+++ b/src/Core/regularisers_GPU/TGV_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TGV_GPU_H__
+#define __TGV_GPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
new file mode 100755
index 0000000..b371c5d
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -0,0 +1,564 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "TV_FGP_GPU_core.h"
+#include "shared.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambdaPar - regularization parameter 
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant 
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default) 
+ * 7. print information: 0 (off) or 1 (on) 
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+{
+    
+    float val1,val2;
+    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
+        if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
+        //Write final result to global memory
+        D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+    }
+    return;
+}
+
+__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip)
+{
+    
+    float val1,val2;
+    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        
+        /* boundary conditions */
+        if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
+        if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
+        
+        //Write final result to global memory
+        P1[index] = R1[index] + multip*val1;
+        P2[index] = R2[index] + multip*val2;
+    }
+    return;
+}
+
+__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+    
+    float denom;    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+        denom = pow(P1[index],2) +  pow(P2[index],2);        
+        if (denom > 1.0f) {
+            P1[index] = P1[index]/sqrt(denom);
+            P2[index] = P2[index]/sqrt(denom);
+        }
+    }
+    return;
+}
+__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+    
+    float val1, val2;    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+                val1 = abs(P1[index]);
+                val2 = abs(P2[index]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[index] = P1[index]/val1;
+                P2[index] = P2[index]/val2;
+    }
+    return;
+}
+__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+{
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+    }
+    return;
+}
+__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        if (Output[index] < 0.0f) Output[index] = 0.0f;
+    }
+}
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+{
+    
+    float val1,val2,val3;
+    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {      
+        if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
+        if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
+        if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
+        //Write final result to global memory
+        D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+    }
+    return;
+}
+
+__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip)
+{
+    
+    float val1,val2,val3;
+    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {       
+        /* boundary conditions */
+        if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
+        if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
+        if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];
+        
+        //Write final result to global memory
+        P1[index] = R1[index] + multip*val1;
+        P2[index] = R2[index] + multip*val2;
+        P3[index] = R3[index] + multip*val3;
+    }
+    return;
+}
+
+__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+    
+    float denom,sq_denom;    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+        denom = pow(P1[index],2) +  pow(P2[index],2) + pow(P3[index],2);
+        
+        if (denom > 1.0f) {
+            sq_denom = 1.0f/sqrt(denom);
+            P1[index] = P1[index]*sq_denom;
+            P2[index] = P2[index]*sq_denom;
+            P3[index] = P3[index]*sq_denom;
+        }
+    }
+    return;
+}
+
+__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+    
+    float val1, val2, val3;    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+                val1 = abs(P1[index]);
+                val2 = abs(P2[index]);
+                val3 = abs(P3[index]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}
+                P1[index] = P1[index]/val1;
+                P2[index] = P2[index]/val2;
+                P3[index] = P3[index]/val3;
+    }
+    return;
+}
+__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+{
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) { 
+        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+        R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
+    }
+    return;
+}
+
+__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        if (Output[index] < 0.0f) Output[index] = 0.0f;
+    }
+}
+__global__ void FGPcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void FGPcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void FGPResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+__global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+
+////////////MAIN HOST FUNCTION ///////////////
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return -1;
+    }
+    
+    int count = 0, i;
+    float re, multip,multip2;    
+	float tk = 1.0f;
+    float tkp1=1.0f;
+        
+    if (dimZ <= 1) {
+		/*2D verson*/
+		int ImSize = dimX*dimY;    
+		float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+   
+		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+        cudaMemset(P1, 0, ImSize*sizeof(float));
+        cudaMemset(P2, 0, ImSize*sizeof(float));
+        cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+        cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+        cudaMemset(R1, 0, ImSize*sizeof(float));
+        cudaMemset(R2, 0, ImSize*sizeof(float));
+
+        /********************** Run CUDA 2D kernel here ********************/    
+        multip = (1.0f/(8.0f*lambdaPar));
+    
+        /* The main kernel */
+        for (i = 0; i < iter; i++) {
+        
+            /* computing the gradient of the objective function */
+            Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            if (nonneg != 0) {
+            nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() ); }
+                    
+            /*Taking a step towards minus of the gradient*/
+            Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* projection step */
+            if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+            else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            multip2 = ((tk-1.0f)/tkp1);
+        
+            Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;       
+             
+                FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );                                              
+            }                  
+        
+            FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );       
+ 
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            if (epsil != 0.0f) cudaFree(d_update_prev);
+            cudaFree(P1);
+            cudaFree(P2);
+            cudaFree(P1_prev);
+            cudaFree(P2_prev);
+            cudaFree(R1);
+            cudaFree(R2);
+    }
+    else {
+            /*3D verson*/
+            int ImSize = dimX*dimY*dimZ;    
+            float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+   
+            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+            dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+    
+            /*allocate space for images on device*/
+            checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );            
+            checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
+    
+            checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+            cudaMemset(P1, 0, ImSize*sizeof(float));
+            cudaMemset(P2, 0, ImSize*sizeof(float));
+            cudaMemset(P3, 0, ImSize*sizeof(float));
+            cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+            cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+            cudaMemset(P3_prev, 0, ImSize*sizeof(float));
+            cudaMemset(R1, 0, ImSize*sizeof(float));
+            cudaMemset(R2, 0, ImSize*sizeof(float));
+            cudaMemset(R3, 0, ImSize*sizeof(float));
+            /********************** Run CUDA 3D kernel here ********************/    
+            multip = (1.0f/(26.0f*lambdaPar));
+    
+            /* The main kernel */
+        for (i = 0; i < iter; i++) {
+        
+            /* computing the gradient of the objective function */
+            Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            if (nonneg != 0) {
+            nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() ); }
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* projection step */
+            if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+            else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            multip2 = ((tk-1.0f)/tkp1);
+        
+            Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );           
+        
+            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );   
+            
+            FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );      
+ 
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);            
+            cudaFree(P1);
+            cudaFree(P2);
+            cudaFree(P3);
+            cudaFree(P1_prev);
+            cudaFree(P2_prev);
+            cudaFree(P3_prev);
+            cudaFree(R1);
+            cudaFree(R2);        
+            cudaFree(R3);        
+    } 
+    //cudaDeviceReset();
+    return 0;
+}
diff --git a/src/Core/regularisers_GPU/TV_FGP_GPU_core.h b/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
new file mode 100755
index 0000000..bf13508
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
@@ -0,0 +1,9 @@
+#ifndef _TV_FGP_GPU_
+#define _TV_FGP_GPU_
+
+#include "CCPiDefines.h"
+#include <memory.h>
+
+extern "C" CCPI_EXPORT int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
new file mode 100755
index 0000000..76f5be9
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -0,0 +1,358 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "TV_ROF_GPU_core.h"
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+*
+* Input Parameters:
+* 1. Noisy image/volume [REQUIRED]
+* 2. lambda - regularization parameter [REQUIRED]
+* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
+* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+*
+* Output:
+* [1] Regularized image/volume
+
+ * This function is based on the paper by
+* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*
+* D. Kazantsev, 2016-18
+*/
+#include "shared.h"
+    
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+    
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-12
+    
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int sign (float x)
+{
+        return (x > 0) - (x < 0);
+}        
+   
+/*********************2D case****************************/    
+    
+    /* differences 1 */
+    __global__ void D1_func2D(float* Input, float* D1, int N, int M)      
+    {
+		int i1, j1, i2;
+		float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;        
+        
+        if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+            
+            /* boundary conditions (Neumann reflections) */
+                i1 = i + 1; if (i1 >= N) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= M) j1 = j-1;
+		
+		     /* Forward-backward differences */
+                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */                
+                NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
+                
+                denom1 = NOMx_1*NOMx_1;
+                denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1), abs((float)NOMy_0)));
+                denom2 = denom2*denom2;
+                T1 = sqrt(denom1 + denom2 + EPS);
+                D1[index] = NOMx_1/T1;
+		}		
+	}       
+    
+    /* differences 2 */
+    __global__ void D2_func2D(float* Input, float* D2, int N, int M)      
+    {
+		int i1, j1, j2;
+		float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;
+        
+        if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+            
+            /* boundary conditions (Neumann reflections) */
+                i1 = i + 1; if (i1 >= N) i1 = i-1;
+                j1 = j + 1; if (j1 >= M) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1; 
+		
+                /* Forward-backward differences */
+                NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+                NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+                NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
+                
+                denom1 = NOMy_1*NOMy_1;
+                denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1), abs((float)NOMx_0)));
+                denom2 = denom2*denom2;
+                T2 = sqrt(denom1 + denom2 + EPS);
+                D2[index] = NOMy_1/T2;
+		}		
+	}
+    
+    __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M)    
+    {
+		int i2, j2;
+		float dv1,dv2;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        
+        int index = i + N*j;        
+        
+        if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+            
+				/* boundary conditions (Neumann reflections) */
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1; 
+                
+				/* divergence components  */
+                dv1 = D1[index] - D1[j2*N + i];
+                dv2 = D2[index] - D2[j*N + i2];
+                
+                Update[index] += tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index]));      
+		
+		}  
+	}   
+/*********************3D case****************************/    
+ 
+    /* differences 1 */
+    __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ)      
+    {
+		float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+		int i1,i2,k1,j1,j2,k2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;     
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+            
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;                    
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
+                    NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */                    
+                    NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+                    
+                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMx_1*NOMx_1;
+                    denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T1 = sqrt(denom1 + denom2 + denom3 + EPS);
+                    D1[index] = NOMx_1/T1;	
+		}		
+	}      
+
+    /* differences 2 */
+    __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ)      
+    {
+		float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+		int i1,i2,k1,j1,j2,k2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;     
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+                    /* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    
+                    /* Forward-backward differences */
+                    NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+                    NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+                    NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                    NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+                    NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+                    
+                    
+                    denom1 = NOMy_1*NOMy_1;
+                    denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+                    denom2 = denom2*denom2;
+                    denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+                    denom3 = denom3*denom3;
+                    T2 = sqrt(denom1 + denom2 + denom3 + EPS);
+                    D2[index] = NOMy_1/T2;
+		}
+	}
+	
+	  /* differences 3 */
+    __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ)      
+    {
+		float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+		int i1,i2,k1,j1,j2,k2;
+		
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+      	int index = (dimX*dimY)*k + j*dimX+i;     
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+				i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                i2 = i - 1; if (i2 < 0) i2 = i+1;
+                j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                j2 = j - 1; if (j2 < 0) j2 = j+1;
+                k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                k2 = k - 1; if (k2 < 0) k2 = k+1;
+                
+                /* Forward-backward differences */
+                NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+                NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+                NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+                NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+                NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+               
+                denom1 = NOMz_1*NOMz_1;
+                denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+                denom2 = denom2*denom2;
+                denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+                denom3 = denom3*denom3;
+                T3 = sqrt(denom1 + denom2 + denom3 + EPS);
+                D3[index] = NOMz_1/T3;
+		}
+	}
+
+    __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ)    
+    {
+		float dv1, dv2, dv3;
+		int i1,i2,k1,j1,j2,k2;
+		int i = blockDim.x * blockIdx.x + threadIdx.x;
+        int j = blockDim.y * blockIdx.y + threadIdx.y;
+        int k = blockDim.z * blockIdx.z + threadIdx.z;
+        
+        int index = (dimX*dimY)*k + j*dimX+i;       
+        
+        if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+            
+					/* symmetric boundary conditions (Neuman) */
+                    i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+                    i2 = i - 1; if (i2 < 0) i2 = i+1;
+                    j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+                    j2 = j - 1; if (j2 < 0) j2 = j+1;
+                    k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+                    k2 = k - 1; if (k2 < 0) k2 = k+1;
+                    
+                    /*divergence components */
+                    dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
+                    dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
+                    dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
+                    
+                    Update[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
+		
+		}  
+	}
+
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
+{
+	    // set up device
+		int dev = 0;
+		CHECK(cudaSetDevice(dev));
+        float *d_input, *d_update, *d_D1, *d_D2;
+        
+	if (Z == 0) Z = 1;
+        CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float)));
+        CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float)));
+        
+        CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+        CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));      
+        
+        if (Z > 1) {
+			// TV - 3D case
+            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+            dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));            
+            
+            float *d_D3;
+            CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float)));
+            
+            for(int n=0; n < iter; n++) {
+                /* calculate differences */
+                D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
+                CHECK(cudaDeviceSynchronize());
+				D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
+                CHECK(cudaDeviceSynchronize());        
+                D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
+                CHECK(cudaDeviceSynchronize());        
+                /*running main kernel*/
+                TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z);
+                CHECK(cudaDeviceSynchronize());
+            }
+            
+            CHECK(cudaFree(d_D3));
+        }
+        else {
+	    // TV - 2D case
+            dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+            dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+             
+            for(int n=0; n < iter; n++) {
+                /* calculate differences */
+                D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
+                CHECK(cudaDeviceSynchronize());
+				D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
+                CHECK(cudaDeviceSynchronize());        
+                /*running main kernel*/
+                TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
+                CHECK(cudaDeviceSynchronize());
+            }
+        }        
+        CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+        CHECK(cudaFree(d_input));
+        CHECK(cudaFree(d_update));
+        CHECK(cudaFree(d_D1));
+        CHECK(cudaFree(d_D2));        
+        //cudaDeviceReset();
+        return 0;
+}
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
new file mode 100755
index 0000000..3a09296
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TVGPU_H__
+#define __TVGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/TV_SB_GPU_core.cu b/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
new file mode 100755
index 0000000..1f494ee
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -0,0 +1,552 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "TV_SB_GPU_core.h"
+#include "shared.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL parameter]
+* 7. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+__global__ void gauss_seidel2D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, float lambda, float mu, float normConst, int N, int M, int ImSize)
+{
+    
+    float sum;
+    int i1,i2,j1,j2;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        
+        sum = Dx[j*N+i2] - Dx[index] + Dy[j2*N+i] - Dy[index] - Bx[j*N+i2] + Bx[index] - By[j2*N+i] + By[index];
+        sum += U_prev[j*N+i1] + U_prev[j*N+i2] + U_prev[j1*N+i] + U_prev[j2*N+i];
+        sum *= lambda;
+        sum += mu*A[index];
+        U[index] = normConst*sum; //Write final result to global memory
+    }
+    return;
+}
+__global__ void updDxDy_shrinkAniso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
+{
+    
+    int i1,j1;
+    float val1, val11, val2, val22, denom_lam;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+                
+            val1 = (U[j*N+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*N+i] - U[index]) + By[index];
+            
+            val11 = abs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+            val22 = abs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+            
+            if (val1 !=0) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0;
+            if (val2 !=0) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0;
+    }
+    return;
+}
+
+__global__ void updDxDy_shrinkIso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
+{
+    
+    int i1,j1;
+    float val1, val11, val2, denom_lam, denom;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        
+            val1 = (U[j*N+i1] - U[index]) + Bx[index];
+            val2 = (U[j1*N+i] - U[index]) + By[index];
+            
+            denom = sqrt(val1*val1 + val2*val2);
+            
+            val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[index] = val11*(val1/denom);
+                Dy[index] = val11*(val2/denom);
+            }
+            else {
+                Dx[index] = 0;
+                Dy[index] = 0;
+            }
+    }
+    return;
+}
+
+__global__ void updBxBy2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, int N, int M, int ImSize)
+{    
+    int i1,j1;
+     
+    //calculate each thread global index
+    const int i=blockIdx.x*blockDim.x+threadIdx.x;
+    const int j=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = j*N+i;
+    
+    if ((i < N) && (j < M)) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == N) i1 = i-1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            
+            Bx[index] += (U[j*N+i1] - U[index]) - Dx[index];
+            By[index] += (U[j1*N+i] - U[index]) - Dy[index];
+    }
+    return;
+}
+
+
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void gauss_seidel3D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, float mu, float normConst, int N, int M, int Z, int ImSize)
+{
+    
+    float sum,d_val,b_val;
+    int i1,i2,j1,j2,k1,k2;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        i2 = i-1; if (i2 < 0) i2 = i+1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        j2 = j-1; if (j2 < 0) j2 = j+1;
+        k1 = k+1; if (k1 == Z) k1 = k-1;
+        k2 = k-1; if (k2 < 0) k2 = k+1;
+        
+        d_val = Dx[(N*M)*k + j*N+i2] - Dx[index] + Dy[(N*M)*k + j2*N+i] - Dy[index] + Dz[(N*M)*k2 + j*N+i] - Dz[index];
+        b_val = -Bx[(N*M)*k + j*N+i2] + Bx[index] - By[(N*M)*k + j2*N+i] + By[index] - Bz[(N*M)*k2 + j*N+i] + Bz[index];
+        sum = d_val + b_val;
+        sum += U_prev[(N*M)*k + j*N+i1] + U_prev[(N*M)*k + j*N+i2] + U_prev[(N*M)*k + j1*N+i] + U_prev[(N*M)*k + j2*N+i] + U_prev[(N*M)*k1 + j*N+i] + U_prev[(N*M)*k2 + j*N+i];
+        sum *= lambda;
+        sum += mu*A[index];
+        U[index] = normConst*sum;
+    }
+    return;
+}
+__global__ void updDxDy_shrinkAniso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
+{
+    
+    int i1,j1,k1;
+    float val1, val11, val2, val3, val22, val33, denom_lam;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        k1 = k+1; if (k1 == Z) k1 = k-1;
+                
+            val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
+            val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
+            val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
+            
+            val11 = abs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
+            val22 = abs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
+            val33 = abs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
+            
+            if (val1 !=0.0f) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0.0f;
+            if (val2 !=0.0f) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0.0f;
+            if (val3 !=0.0f) Dz[index] = (val3/abs(val3))*val33; else Dz[index] = 0.0f;
+    }
+    return;
+}
+
+__global__ void updDxDy_shrinkIso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
+{
+    
+    int i1,j1,k1;
+    float val1, val11, val2, val3, denom_lam, denom;
+    denom_lam = 1.0f/lambda;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        i1 = i+1; if (i1 == N) i1 = i-1;
+        j1 = j+1; if (j1 == M) j1 = j-1;
+        k1 = k+1; if (k1 == Z) k1 = k-1;
+        
+            val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
+            val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
+            val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
+            
+            denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+            
+            val11 = (denom - denom_lam); if (val11 < 0.0f) val11 = 0.0f;
+            
+            if (denom != 0.0f) {
+                Dx[index] = val11*(val1/denom);
+                Dy[index] = val11*(val2/denom);
+                Dz[index] = val11*(val3/denom);
+            }
+            else {
+                Dx[index] = 0.0f;
+                Dy[index] = 0.0f;
+                Dz[index] = 0.0f;
+            }
+    }
+    return;
+}
+
+__global__ void updBxBy3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int N, int M, int Z, int ImSize)
+{    
+    int i1,j1,k1;
+     
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+            /* symmetric boundary conditions (Neuman) */
+            i1 = i+1; if (i1 == N) i1 = i-1;
+            j1 = j+1; if (j1 == M) j1 = j-1;
+            k1 = k+1; if (k1 == Z) k1 = k-1;
+            
+            Bx[index] += (U[(N*M)*k + i1 + N*j] - U[index]) - Dx[index];
+            By[index] += (U[(N*M)*k + i + N*j1] - U[index]) - Dy[index];
+            Bz[index] += (U[(N*M)*k1 + i + N*j] - U[index]) - Dz[index];
+    }
+    return;
+}
+
+__global__ void SBcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void SBcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void SBResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+__global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return -1;
+    }
+    
+	int ll, DimTotal;
+	float re, lambda, normConst;
+    int count = 0;
+    mu = 1.0f/mu;
+	lambda = 2.0f*mu;
+
+    if (dimZ <= 1) {
+		/*2D verson*/
+		DimTotal = dimX*dimY;
+		normConst = 1.0f/(mu + 4.0f*lambda);
+		float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
+   
+		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        cudaMemset(Dx, 0, DimTotal*sizeof(float));
+        cudaMemset(Dy, 0, DimTotal*sizeof(float));
+        cudaMemset(Bx, 0, DimTotal*sizeof(float));
+        cudaMemset(By, 0, DimTotal*sizeof(float));
+
+        /********************** Run CUDA 2D kernels here ********************/   
+        /* The main kernel */
+        for (ll = 0; ll < iter; ll++) {
+        
+        /* storing old value */
+        SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );  
+
+		 /* perform two GS iterations (normally 2 is enough for the convergence) */
+        gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );  
+        /* 2nd GS iteration */
+        gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+        /* TV-related step */
+          if (methodTV == 1)  updDxDy_shrinkAniso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
+          else updDxDy_shrinkIso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
+            
+        /* update for Bregman variables */
+        updBxBy2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, dimX, dimY, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+          if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                SBResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, DimTotal);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
+                thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);  		
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;
+          }
+        
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            cudaFree(d_update_prev);
+            if (epsil != 0.0f) cudaFree(d_res);
+            cudaFree(Dx);
+            cudaFree(Dy);
+            cudaFree(Bx);
+            cudaFree(By);
+    }
+    else {
+		/*3D verson*/
+		DimTotal = dimX*dimY*dimZ;
+		normConst = 1.0f/(mu + 6.0f*lambda);
+		float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
+   
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Dz,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&Bz,DimTotal*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+        cudaMemset(Dx, 0, DimTotal*sizeof(float));
+        cudaMemset(Dy, 0, DimTotal*sizeof(float));
+        cudaMemset(Dz, 0, DimTotal*sizeof(float));
+        cudaMemset(Bx, 0, DimTotal*sizeof(float));
+        cudaMemset(By, 0, DimTotal*sizeof(float));
+        cudaMemset(Bz, 0, DimTotal*sizeof(float));
+
+        /********************** Run CUDA 3D kernels here ********************/   
+        /* The main kernel */
+        for (ll = 0; ll < iter; ll++) {
+        
+        /* storing old value */
+        SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );
+
+		 /* perform two GS iterations (normally 2 is enough for the convergence) */
+        gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );  
+        /* 2nd GS iteration */
+        gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+        /* TV-related step */
+          if (methodTV == 1)  updDxDy_shrinkAniso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
+          else updDxDy_shrinkIso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
+            
+        /* update for Bregman variables */
+        updBxBy3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, DimTotal);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() ); 
+        
+          if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                SBResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, dimZ, DimTotal);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
+                thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);  		
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);
+                if (re < epsil)  count++;
+                    if (count > 4) break;
+          }
+        }
+        if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            cudaFree(d_update_prev);
+            if (epsil != 0.0f) cudaFree(d_res);
+            cudaFree(Dx);
+            cudaFree(Dy);
+            cudaFree(Dz);
+            cudaFree(Bx);
+            cudaFree(By);
+            cudaFree(Bz);
+    } 
+    //cudaDeviceReset();
+    return 0;
+}
diff --git a/src/Core/regularisers_GPU/TV_SB_GPU_core.h b/src/Core/regularisers_GPU/TV_SB_GPU_core.h
new file mode 100755
index 0000000..901b90f
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_SB_GPU_core.h
@@ -0,0 +1,10 @@
+#ifndef _SB_TV_GPU_
+#define _SB_TV_GPU_
+
+#include "CCPiDefines.h"
+#include <memory.h>
+
+
+extern "C" CCPI_EXPORT int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
new file mode 100644
index 0000000..7503ec7
--- /dev/null
+++ b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -0,0 +1,741 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+#include "shared.h"
+#include "dTV_FGP_GPU_core.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+ 
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+
+__global__ void GradNorm_func2D_kernel(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize)
+{
+    
+    float val1, val2, gradX, gradY, magn;
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        /* boundary conditions */
+        if (xIndex >= N-1) val1 = 0.0f; else val1 =  Refd[(xIndex+1) + N*yIndex];
+        if (yIndex >= M-1) val2 = 0.0f; else val2 =  Refd[(xIndex) + N*(yIndex + 1)];        
+        
+            gradX = val1 - Refd[index];
+            gradY = val2 - Refd[index];
+            magn = pow(gradX,2) + pow(gradY,2);
+            magn = sqrt(magn + pow(eta,2));
+            Refd_x[index] = gradX/magn;
+            Refd_y[index] = gradY/magn;         
+    }
+    return;
+}
+
+__global__ void ProjectVect_func2D_kernel(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize)
+{
+    
+    float in_prod;
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {
+        in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index];   /* calculate inner product */
+        R1[index] = R1[index] - in_prod*Refd_x[index];
+        R2[index] = R2[index] - in_prod*Refd_y[index];       
+    }
+    return;
+}
+
+
+__global__ void Obj_dfunc2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+{
+    
+    float val1,val2;
+    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
+        if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
+        
+        //Write final result to global memory
+        D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+    }
+    return;
+}
+
+__global__ void Grad_dfunc2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2,  float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip)
+{
+    
+    float val1,val2,in_prod;
+    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        
+        /* boundary conditions */
+        if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
+        if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
+        
+        in_prod = val1*Refd_x[index] + val2*Refd_y[index];   /* calculate inner product */
+        val1 = val1 - in_prod*Refd_x[index];
+        val2 = val2 - in_prod*Refd_y[index];   
+        
+        //Write final result to global memory
+        P1[index] = R1[index] + multip*val1;
+        P2[index] = R2[index] + multip*val2;
+    }
+    return;
+}
+
+__global__ void Proj_dfunc2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+    
+    float denom;    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+        denom = pow(P1[index],2) +  pow(P2[index],2);        
+        if (denom > 1.0f) {
+            P1[index] = P1[index]/sqrt(denom);
+            P2[index] = P2[index]/sqrt(denom);
+        }
+    }
+    return;
+}
+__global__ void Proj_dfunc2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+    
+    float val1, val2;    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+                val1 = abs(P1[index]);
+                val2 = abs(P2[index]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[index] = P1[index]/val1;
+                P2[index] = P2[index]/val2;
+    }
+    return;
+}
+__global__ void Rupd_dfunc2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+{
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+    }
+    return;
+}
+__global__ void dTVnonneg2D_kernel(float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        if (Output[index] < 0.0f) Output[index] = 0.0f;
+    }
+}
+__global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void GradNorm_func3D_kernel(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize)
+{
+    
+    float val1, val2, val3, gradX, gradY, gradZ, magn;
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {  
+        /* boundary conditions */
+        if (i >= N-1) val1 = 0.0f; else val1 =  Refd[(N*M)*k + (i+1) + N*j];
+        if (j >= M-1) val2 = 0.0f; else val2 =  Refd[(N*M)*k + i + N*(j+1)];
+        if (k >= Z-1) val3 = 0.0f; else val3 =  Refd[(N*M)*(k+1) + i + N*j];
+        
+            gradX = val1 - Refd[index];
+            gradY = val2 - Refd[index];
+            gradZ = val3 - Refd[index];
+            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+            magn = sqrt(magn + pow(eta,2));
+            Refd_x[index] = gradX/magn;
+            Refd_y[index] = gradY/magn;
+            Refd_z[index] = gradZ/magn;
+    }
+    return;
+}
+
+__global__ void ProjectVect_func3D_kernel(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize)
+{
+    
+    float in_prod;
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index] + R3[index]*Refd_z[index]; /* calculate inner product */
+        
+        R1[index] = R1[index] - in_prod*Refd_x[index];
+        R2[index] = R2[index] - in_prod*Refd_y[index];
+        R3[index] = R3[index] - in_prod*Refd_z[index];
+    }
+    return;
+}
+
+
+__global__ void Obj_dfunc3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+{
+    
+    float val1,val2,val3;
+    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
+        if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
+        if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
+        //Write final result to global memory
+        D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+    }
+    return;
+}
+
+__global__ void Grad_dfunc3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip)
+{
+    
+    float val1,val2,val3,in_prod;
+    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+        /* boundary conditions */
+        if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
+        if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
+        if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];       
+        
+        in_prod = val1*Refd_x[index] + val2*Refd_y[index] + val3*Refd_z[index];   /* calculate inner product */
+        val1 = val1 - in_prod*Refd_x[index];
+        val2 = val2 - in_prod*Refd_y[index];
+        val3 = val3 - in_prod*Refd_z[index];
+        
+        //Write final result to global memory
+        P1[index] = R1[index] + multip*val1;
+        P2[index] = R2[index] + multip*val2;
+        P3[index] = R3[index] + multip*val3;
+    }
+    return;
+}
+
+__global__ void Proj_dfunc3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+    
+    float denom,sq_denom;    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+        denom = pow(P1[index],2) +  pow(P2[index],2) + pow(P3[index],2);
+        
+        if (denom > 1.0f) {
+            sq_denom = 1.0f/sqrt(denom);
+            P1[index] = P1[index]*sq_denom;
+            P2[index] = P2[index]*sq_denom;
+            P3[index] = P3[index]*sq_denom;
+        }
+    }
+    return;
+}
+
+__global__ void Proj_dfunc3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+    
+    float val1, val2, val3;    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+                val1 = abs(P1[index]);
+                val2 = abs(P2[index]);
+                val3 = abs(P3[index]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}
+                P1[index] = P1[index]/val1;
+                P2[index] = P2[index]/val2;
+                P3[index] = P3[index]/val3;
+    }
+    return;
+}
+
+
+__global__ void Rupd_dfunc3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+{
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) { 
+        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+        R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
+    }
+    return;
+}
+
+__global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        if (Output[index] < 0.0f) Output[index] = 0.0f;
+    }
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+
+////////////MAIN HOST FUNCTION ///////////////
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return -1;
+    }
+    
+    int count = 0, i;
+    float re, multip,multip2;    
+	float tk = 1.0f;
+    float tkp1=1.0f;
+        
+    if (dimZ <= 1) {
+		/*2D verson*/
+		int ImSize = dimX*dimY;    
+		float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *d_InputRef=NULL;
+   
+		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+        
+        cudaMemset(P1, 0, ImSize*sizeof(float));
+        cudaMemset(P2, 0, ImSize*sizeof(float));
+        cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+        cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+        cudaMemset(R1, 0, ImSize*sizeof(float));
+        cudaMemset(R2, 0, ImSize*sizeof(float));
+        cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
+        cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
+        
+        /******************** Run CUDA 2D kernel here ********************/
+        multip = (1.0f/(8.0f*lambdaPar));
+        /* calculate gradient vectors for the reference */
+        GradNorm_func2D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );
+    
+        /* The main kernel */
+        for (i = 0; i < iter; i++) {
+        
+            /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/         
+            ProjectVect_func2D_kernel<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            /* computing the gradient of the objective function */
+            Obj_dfunc2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            if (nonneg != 0) {
+            dTVnonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() ); }
+                    
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* projection step */
+            if (methodTV == 0) Proj_dfunc2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+            else Proj_dfunc2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            multip2 = ((tk-1.0f)/tkp1);
+        
+            Rupd_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                dTVResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;       
+             
+                dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );                                              
+            }
+        
+            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );       
+ 
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            if (epsil != 0.0f) cudaFree(d_update_prev);
+            cudaFree(P1);
+            cudaFree(P2);
+            cudaFree(P1_prev);
+            cudaFree(P2_prev);
+            cudaFree(R1);
+            cudaFree(R2);
+            
+            cudaFree(d_InputRef);
+            cudaFree(InputRef_x);
+            cudaFree(InputRef_y);
+    }
+    else {
+            /*3D verson*/
+            int ImSize = dimX*dimY*dimZ;    
+            float *d_input, *d_update=NULL, *d_update_prev, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL, *d_InputRef=NULL;
+   
+            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+            dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+    
+            /*allocate space for images on device*/
+            checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+            if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&InputRef_z,ImSize*sizeof(float)) );    
+    
+            checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+            checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+            
+            cudaMemset(P1, 0, ImSize*sizeof(float));
+            cudaMemset(P2, 0, ImSize*sizeof(float));
+            cudaMemset(P3, 0, ImSize*sizeof(float));
+            cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+            cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+            cudaMemset(P3_prev, 0, ImSize*sizeof(float));
+            cudaMemset(R1, 0, ImSize*sizeof(float));
+            cudaMemset(R2, 0, ImSize*sizeof(float));
+            cudaMemset(R3, 0, ImSize*sizeof(float));
+            cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
+            cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
+            cudaMemset(InputRef_z, 0, ImSize*sizeof(float));
+            
+            /********************** Run CUDA 3D kernel here ********************/    
+            multip = (1.0f/(26.0f*lambdaPar));
+            /* calculate gradient vectors for the reference */
+            GradNorm_func3D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+    
+            /* The main kernel */
+        for (i = 0; i < iter; i++) {
+
+			/*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+            ProjectVect_func3D_kernel<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* computing the gradient of the objective function */
+            Obj_dfunc3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            if (nonneg != 0) {
+            dTVnonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() ); }
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* projection step */
+            if (methodTV == 0) Proj_dfunc3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+            else Proj_dfunc3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            multip2 = ((tk-1.0f)/tkp1);
+        
+            Rupd_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                dTVResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;       
+             
+                dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );
+            }
+        
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );   
+            
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );      
+ 
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            if (epsil != 0.0f) cudaFree(d_update_prev);
+            cudaFree(P1);
+            cudaFree(P2);
+            cudaFree(P3);
+            cudaFree(P1_prev);
+            cudaFree(P2_prev);
+            cudaFree(P3_prev);
+            cudaFree(R1);
+            cudaFree(R2);
+            cudaFree(R3);
+            cudaFree(InputRef_x);
+            cudaFree(InputRef_y);
+            cudaFree(InputRef_z);
+            cudaFree(d_InputRef);
+    }
+    //cudaDeviceReset();
+    return 0;
+}
diff --git a/src/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.h
new file mode 100644
index 0000000..f9281e8
--- /dev/null
+++ b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.h
@@ -0,0 +1,9 @@
+#ifndef _dTV_FGP_GPU_
+#define _dTV_FGP_GPU_
+
+#include "CCPiDefines.h"
+#include <memory.h>
+
+extern "C" CCPI_EXPORT int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+#endif 
diff --git a/src/Core/regularisers_GPU/shared.h b/src/Core/regularisers_GPU/shared.h
new file mode 100644
index 0000000..fe98cd6
--- /dev/null
+++ b/src/Core/regularisers_GPU/shared.h
@@ -0,0 +1,42 @@
+/*shared macros*/
+
+
+/*checks CUDA call, should be used in functions returning <int> value
+if error happens, writes to standard error and explicitly returns -1*/
+#define CHECK(call)                                                            \
+{                                                                              \
+    const cudaError_t error = call;                                            \
+    if (error != cudaSuccess)                                                  \
+    {                                                                          \
+        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
+        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
+                cudaGetErrorString(error));                                    \
+        return -1;                                                             \
+    }                                                                          \
+}
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(call)                                                            \
+{                                                                              \
+    const cudaError_t error = call;                                            \
+    if (error != cudaSuccess)                                                  \
+    {                                                                          \
+        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
+        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
+                cudaGetErrorString(error));                                    \
+        return -1;                                                                \
+    }                                                                          \
+}
+/*#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        return;
+    }
+}
+*/
+
diff --git a/src/Matlab/CMakeLists.txt b/src/Matlab/CMakeLists.txt
new file mode 100755
index 0000000..b97f845
--- /dev/null
+++ b/src/Matlab/CMakeLists.txt
@@ -0,0 +1,147 @@
+project(regulariserMatlab)
+
+
+find_package(Matlab REQUIRED COMPONENTS MAIN_PROGRAM MX_LIBRARY ENG_LIBRARY )
+
+
+
+#C:\Users\ofn77899\Documents\Projects\CCPi\GitHub\CCPi-FISTA_Reconstruction\Core\regularisers_CPU
+# matlab_add_mex(
+    # NAME CPU_ROF
+    # SRC 
+      # ${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
+    # LINK_TO cilreg ${Matlab_LIBRARIES}
+    # )
+    
+# target_include_directories(CPU_ROF 
+   # PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+   # ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+   # ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+   # ${CMAKE_SOURCE_DIR}/Core/
+   # ${MATLAB_INCLUDE_DIR})
+   
+   # matlab_add_mex(
+    # NAME CPU_TNV
+    # SRC 
+      # ${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/TNV.c 
+    # LINK_TO cilreg ${Matlab_LIBRARIES}
+    # )
+    
+# target_include_directories(CPU_TNV 
+   # PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+   # ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+   # ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+   # ${CMAKE_SOURCE_DIR}/Core/
+   # ${MATLAB_INCLUDE_DIR})
+   
+#set (CPU_MEX_FILES "regularisers_CPU/TNV.c;regularisers_CPU/ROF_TV.c")
+#set (MEX_TARGETS "CPU_TNV;CPU_ROF")
+#list(APPEND MEX_TARGETS "CPU_TNV")
+#list(APPEND MEX_TARGETS "CPU_ROF")
+
+file(GLOB CPU_MEX_FILES
+    "${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/*.c"
+    #"${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_GPU/*.c"
+)
+
+#message("CPU_MEX_FILES " ${CPU_MEX_FILES})
+
+list(LENGTH CPU_MEX_FILES num)
+
+
+MATH(EXPR num "${num}-1")
+#set(num "-1")
+message("found ${num} files")
+
+foreach(tgt RANGE 0 ${num})
+  message("number " ${tgt})
+  list(LENGTH CPU_MEX_FILES num2)
+  message("the list is ${num2}")
+  #list(GET CPU_TARGETS ${tgt} current_target)
+  list(GET CPU_MEX_FILES ${tgt} current_file_name)
+  get_filename_component(current_file ${current_file_name} NAME)
+  string(REGEX MATCH "(.+).c" match ${current_file})
+  if (NOT ${match} EQUAL "" )
+  set (current_target ${CMAKE_MATCH_1})
+  endif()
+  message("matlab_add_mex target " ${current_file} " and " ${current_target})
+  matlab_add_mex(
+    NAME ${current_target}
+    SRC 
+      ${current_file_name} 
+            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/FGP_TV_core.c
+	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/SB_TV_core.c
+	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/TGV_core.c
+	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/Diffusion_core.c
+	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/Diffus4th_order_core.c
+	    #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/LLT_ROF_core.c
+            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/ROF_TV_core.c
+            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/FGP_dTV_core.c
+            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/TNV_core.c
+            #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/utils.c
+	    #${CMAKE_SOURCE_DIR}/Core/inpainters_CPU/Diffusion_Inpaint_core.c
+	    #${CMAKE_SOURCE_DIR}/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
+    LINK_TO cilreg ${Matlab_LIBRARIES}
+    )
+    
+target_include_directories(${current_target}
+   PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+   ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+   ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+   ${CMAKE_SOURCE_DIR}/Core/
+   ${MATLAB_INCLUDE_DIR})
+   set_property(TARGET ${current_target} PROPERTY C_STANDARD 99)
+   list(APPEND CPU_MEX_TARGETS ${current_target})
+   INSTALL(TARGETS ${current_target} DESTINATION "${MATLAB_DEST}")
+endforeach()
+   
+add_custom_target(MatlabWrapper DEPENDS ${CPU_MEX_TARGETS})
+
+if (BUILD_CUDA)
+    find_package(CUDA)
+    if (CUDA_FOUND)
+      file(GLOB GPU_MEX_FILES
+        "${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_GPU/*.cpp"
+      )
+
+      list(LENGTH GPU_MEX_FILES num)
+message("number of GPU files  " ${num})
+
+      MATH(EXPR num "${num}-1")
+    #set(num "-1")
+
+      foreach(tgt RANGE ${num})
+        message("number " ${tgt})
+  list(LENGTH GPU_MEX_FILES num2)
+  message("the list is ${num2}")
+  #list(GET CPU_TARGETS ${tgt} current_target)
+  list(GET GPU_MEX_FILES ${tgt} current_file_name)
+  get_filename_component(current_file ${current_file_name} NAME)
+  string(REGEX MATCH "(.+).c" match ${current_file})
+  if (NOT ${match} EQUAL "" )
+  set (current_target ${CMAKE_MATCH_1})
+  endif()
+  message("matlab_add_mex target " ${current_file} " and " ${current_target})
+        message("matlab_add_mex " ${current_target})
+        matlab_add_mex(
+          NAME ${current_target}
+          SRC 
+            ${current_file_name} 
+          LINK_TO cilregcuda ${Matlab_LIBRARIES}
+          )
+        
+        target_include_directories(${current_target}
+        PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+               ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+               ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+               ${CMAKE_SOURCE_DIR}/Core/
+               ${MATLAB_INCLUDE_DIR})
+       
+        list(APPEND GPU_MEX_TARGETS ${current_target})
+        INSTALL(TARGETS ${current_target} DESTINATION "${MATLAB_DEST}")
+      endforeach()
+       
+      add_custom_target(MatlabWrapperGPU DEPENDS ${GPU_MEX_TARGETS})
+      
+    endif()
+endif()
diff --git a/src/Matlab/mex_compile/compileCPU_mex_Linux.m b/src/Matlab/mex_compile/compileCPU_mex_Linux.m
new file mode 100644
index 0000000..72a828e
--- /dev/null
+++ b/src/Matlab/mex_compile/compileCPU_mex_Linux.m
@@ -0,0 +1,81 @@
+% execute this mex file on Linux in Matlab once
+
+fsep = '/';
+
+pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_CPU'], 1i);
+pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
+pathcopyFrom2 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'inpainters_CPU'], 1i);
+
+copyfile(pathcopyFrom, 'regularisers_CPU');
+copyfile(pathcopyFrom1, 'regularisers_CPU');
+copyfile(pathcopyFrom2, 'regularisers_CPU');
+
+cd regularisers_CPU
+
+Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
+
+fprintf('%s \n', '<<<<<<<<<<<Compiling CPU regularisers>>>>>>>>>>>>>');
+
+fprintf('%s \n', 'Compiling ROF-TV...');
+mex ROF_TV.c ROF_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('ROF_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling FGP-TV...');
+mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('FGP_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling SB-TV...');
+mex SB_TV.c SB_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('SB_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling dFGP-TV...');
+mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('FGP_dTV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TNV...');
+mex TNV.c TNV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TNV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLinear Diffusion...');
+mex NonlDiff.c Diffusion_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('NonlDiff.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
+mex Diffusion_4thO.c Diffus4th_order_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('Diffusion_4thO.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TGV...');
+mex TGV.c TGV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TGV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling ROF-LLT...');
+mex LLT_ROF.c LLT_ROF_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('LLT_ROF.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLocal-TV...');
+mex PatchSelect.c PatchSelect_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('Nonlocal_TV.mex*',Pathmove);
+movefile('PatchSelect.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling additional tools...');
+mex TV_energy.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TV_energy.mex*',Pathmove);
+
+%############Inpainters##############%
+fprintf('%s \n', 'Compiling Nonlinear/Linear diffusion inpainting...');
+mex NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('NonlDiff_Inp.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Nonlocal marching method for inpainting...');
+mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
+
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* LLT_ROF_core* CCPiDefines.h
+delete PatchSelect_core* Nonlocal_TV_core*
+delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
+fprintf('%s \n', '<<<<<<< Regularisers successfully compiled! >>>>>>>');
+
+pathA2 = sprintf(['..' fsep '..' fsep], 1i);
+cd(pathA2);
+cd demos
diff --git a/src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m b/src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
new file mode 100644
index 0000000..6f7541c
--- /dev/null
+++ b/src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
@@ -0,0 +1,135 @@
+% execute this mex file on Windows in Matlab once
+
+% >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+% I've been able to compile on Windows 7 with MinGW and Matlab 2016b, however, 
+% not sure if openmp is enabled after the compilation. 
+
+% Here I present two ways how software can be compiled, if you have some
+% other suggestions/remarks please contact me at dkazanc@hotmail.com 
+% >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+
+fsep = '/';
+
+pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_CPU'], 1i);
+pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
+pathcopyFrom2 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'inpainters_CPU'], 1i);
+
+copyfile(pathcopyFrom, 'regularisers_CPU');
+copyfile(pathcopyFrom1, 'regularisers_CPU');
+copyfile(pathcopyFrom2, 'regularisers_CPU');
+
+cd regularisers_CPU
+
+Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
+
+fprintf('%s \n', '<<<<<<<<<<<Compiling CPU regularisers>>>>>>>>>>>>>');
+
+fprintf('%s \n', 'Compiling ROF-TV...');
+mex ROF_TV.c ROF_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('ROF_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling FGP-TV...');
+mex FGP_TV.c FGP_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('FGP_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling SB-TV...');
+mex SB_TV.c SB_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('SB_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling dFGP-TV...');
+mex FGP_dTV.c FGP_dTV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('FGP_dTV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TNV...');
+mex TNV.c TNV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TNV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLinear Diffusion...');
+mex NonlDiff.c Diffusion_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('NonlDiff.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
+mex Diffusion_4thO.c Diffus4th_order_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('Diffusion_4thO.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TGV...');
+mex TGV.c TGV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TGV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling ROF-LLT...');
+mex LLT_ROF.c LLT_ROF_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('LLT_ROF.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLocal-TV...');
+mex PatchSelect.c PatchSelect_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('Nonlocal_TV.mex*',Pathmove);
+movefile('PatchSelect.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling additional tools...');
+mex TV_energy.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TV_energy.mex*',Pathmove);
+
+%############Inpainters##############%
+fprintf('%s \n', 'Compiling Nonlinear/Linear diffusion inpainting...');
+mex NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('NonlDiff_Inp.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Nonlocal marching method for inpaiting...');
+mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
+
+
+%%
+%%% The second approach to compile using TDM-GCC which follows this
+%%% discussion:
+%%% https://uk.mathworks.com/matlabcentral/answers/279171-using-mingw-compiler-and-open-mp#comment_359122
+%%% 1. Install TDM-GCC independently from http://tdm-gcc.tdragon.net/ (I installed 5.1.0)
+%%% Install openmp version: http://sourceforge.net/projects/tdm-gcc/files/TDM-GCC%205%20series/5.1.0-tdm64-1/gcc-5.1.0-tdm64-1-openmp.zip/download
+%%% 2. Link til libgomp.a in that installation when compilling your mex file.
+
+%%% assuming you unzipped TDM GCC (OpenMp) in folder TDMGCC on C drive, uncomment
+%%% bellow
+% fprintf('%s \n', 'Compiling CPU regularisers...');
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" ROF_TV.c ROF_TV_core.c utils.c
+% movefile('ROF_TV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" FGP_TV.c FGP_TV_core.c utils.c
+% movefile('FGP_TV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" SB_TV.c SB_TV_core.c utils.c
+% movefile('SB_TV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" FGP_dTV.c FGP_dTV_core.c utils.c
+% movefile('FGP_dTV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TNV.c TNV_core.c utils.c
+% movefile('TNV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff.c Diffusion_core.c utils.c
+% movefile('NonlDiff.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Diffusion_4thO.c Diffus4th_order_core.c utils.c
+% movefile('Diffusion_4thO.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TGV.c TGV_core.c utils.c
+% movefile('TGV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" LLT_ROF.c LLT_ROF_core.c utils.c
+% movefile('LLT_ROF.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" PatchSelect.c PatchSelect_core.c utils.c
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Nonlocal_TV.c Nonlocal_TV_core.c utils.c
+% movefile('Nonlocal_TV.mex*',Pathmove);
+% movefile('PatchSelect.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TV_energy.c utils.c
+% movefile('TV_energy.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c
+% movefile('NonlDiff_Inp.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c
+% movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
+
+
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* CCPiDefines.h
+delete PatchSelect_core* Nonlocal_TV_core*
+delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
+fprintf('%s \n', 'Regularisers successfully compiled!');
+
+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%pathA2 = sprintf(['..' fsep '..' fsep], 1i);
+%cd(pathA2);
+%cd demos
diff --git a/src/Matlab/mex_compile/compileGPU_mex.m b/src/Matlab/mex_compile/compileGPU_mex.m
new file mode 100644
index 0000000..dd1475c
--- /dev/null
+++ b/src/Matlab/mex_compile/compileGPU_mex.m
@@ -0,0 +1,74 @@
+% execute this mex file in Matlab once
+
+%>>>>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<<
+% In order to compile CUDA modules one needs to have nvcc-compiler
+% installed (see CUDA SDK), check it under MATLAB with !nvcc --version
+
+% In the code bellow we provide a full explicit path to nvcc compiler 
+% ! paths to matlab and CUDA sdk can be different, modify accordingly !
+
+% Tested on Ubuntu 18.04/MATLAB 2016b/cuda10.0/gcc7.3
+
+% Installation HAS NOT been tested on Windows, please you Cmake build or
+% modify the code bellow accordingly
+fsep = '/';
+
+pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_GPU'], 1i);
+pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
+
+copyfile(pathcopyFrom, 'regularisers_GPU');
+copyfile(pathcopyFrom1, 'regularisers_GPU');
+
+cd regularisers_GPU
+
+Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
+
+fprintf('%s \n', '<<<<<<<<<<<Compiling GPU regularisers (CUDA)>>>>>>>>>>>>>');
+
+fprintf('%s \n', 'Compiling ROF-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o
+movefile('ROF_TV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling FGP-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
+movefile('FGP_TV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling SB-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
+movefile('SB_TV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TGV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o
+movefile('TGV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling dFGP-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
+movefile('FGP_dTV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLinear Diffusion...');
+!/usr/local/cuda/bin/nvcc -O0 -c NonlDiff_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o
+movefile('NonlDiff_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
+!/usr/local/cuda/bin/nvcc -O0 -c Diffus_4thO_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o
+movefile('Diffusion_4thO_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling ROF-LLT...');
+!/usr/local/cuda/bin/nvcc -O0 -c LLT_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o
+movefile('LLT_ROF_GPU.mex*',Pathmove);
+
+
+delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* LLT_ROF_GPU_core* CCPiDefines.h
+fprintf('%s \n', 'All successfully compiled!');
+
+pathA2 = sprintf(['..' fsep '..' fsep], 1i);
+cd(pathA2);
+cd demos
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/installed/MEXed_files_location.txt b/src/Matlab/mex_compile/installed/MEXed_files_location.txt
new file mode 100644
index 0000000..e69de29
diff --git a/src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c b/src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
new file mode 100644
index 0000000..66ea9be
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
@@ -0,0 +1,77 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffus4th_order_core.h"
+
+/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Edge-preserving parameter (sigma) [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL, default 300]
+ * 5. tau - time-marching step for the explicit scheme [OPTIONAL, default 0.015]
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    float *Input, *Output=NULL, lambda, tau, sigma;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+    iter_numb = 300; /* iterations number */
+    tau = 0.01; /* marching step parameter */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant");
+    if ((nrhs == 4) || (nrhs == 5))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if (nrhs == 5)  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    Diffus4th_CPU_main(Input, Output, lambda, sigma, iter_numb, tau, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c b/src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
new file mode 100644
index 0000000..642362f
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
@@ -0,0 +1,97 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, methTV, printswitch, nonneg;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 300; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    nonneg = 0; /* default nonnegativity switch, off - 0 */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7))  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if ((nrhs == 6) || (nrhs == 7))  {
+        nonneg = (int) mxGetScalar(prhs[5]);
+        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+    }
+    if (nrhs == 7)  {
+        printswitch = (int) mxGetScalar(prhs[6]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c b/src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
new file mode 100644
index 0000000..1a0c070
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
@@ -0,0 +1,114 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "FGP_dTV_core.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, methTV, printswitch, nonneg;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    const mwSize *dim_array2;    
+    float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    InputRef  = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
+    iter = 300; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    eta = 0.01; /* default smoothing constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    nonneg = 0; /* default nonnegativity switch, off - 0 */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+        
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
+    if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}   
+    
+    
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  iter = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
+    if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {
+    eta =  (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
+    }
+    if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {        
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }    
+    if ((nrhs == 8) || (nrhs == 9))  {
+        nonneg = (int) mxGetScalar(prhs[7]);
+        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+    }
+    if (nrhs == 9)  {
+        printswitch = (int) mxGetScalar(prhs[8]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }    
+   
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    dTV_FGP_CPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c b/src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
new file mode 100644
index 0000000..ab45446
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
@@ -0,0 +1,82 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "LLT_ROF_core.h"
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+* 
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
+* lambdaLLT starting with smaller values. 
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step 
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References: 
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iterationsNumb;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;    
+    float *Input, *Output=NULL, lambdaROF, lambdaLLT, tau;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter (ROF), Regularisation parameter (LTT), iterations number, time-marching parameter");
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambdaROF =  (float) mxGetScalar(prhs[1]); /* ROF regularization parameter */
+    lambdaLLT =  (float) mxGetScalar(prhs[2]); /* ROF regularization parameter */    
+    iterationsNumb = 250;
+    tau =  0.0025;
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
+    if ((nrhs == 4) || (nrhs == 5)) iterationsNumb =  (int) mxGetScalar(prhs[3]); /* iterations number */    
+    if (nrhs == 5) tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */  
+        
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));                        
+    }    
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));   
+  
+    LLT_ROF_CPU_main(Input, Output, lambdaROF, lambdaLLT, iterationsNumb, tau, dimX, dimY, dimZ);    
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
new file mode 100644
index 0000000..ec35b8b
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
@@ -0,0 +1,89 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffusion_core.h"
+
+/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1] (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended  [OPTIONAL parameter]
+ * 5. tau - time-marching step for explicit scheme [OPTIONAL parameter]
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb, penaltytype;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;   
+    
+    float *Input, *Output=NULL, lambda, tau, sigma;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+    iter_numb = 300; /* iterations number */
+    tau = 0.025; /* marching step parameter */
+    penaltytype = 1; /* Huber penalty by default */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if ((nrhs < 3) || (nrhs > 6)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if ((nrhs == 5) || (nrhs == 6))  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
+    if (nrhs == 6)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[5]); /* Huber, PM or Tukey 'Huber' is the default */
+        if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
+        if (strcmp(penalty_type, "Huber") == 0)  penaltytype = 1;  /* enable 'Huber' penalty */
+        if (strcmp(penalty_type, "PM") == 0)  penaltytype = 2;  /* enable Perona-Malik penalty */
+        if (strcmp(penalty_type, "Tukey") == 0)  penaltytype = 3;  /* enable Tikey Biweight penalty */
+        mxFree(penalty_type);
+    }    
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    Diffusion_CPU_main(Input, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
new file mode 100644
index 0000000..9833392
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
@@ -0,0 +1,103 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffusion_Inpaint_core.h"
+
+/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Image/volume to inpaint
+ * 2. Inpainting Mask of the same size as (1) in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data)
+ * 3. lambda - regularization parameter
+ * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 5. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 6. tau - time-marching step for explicit scheme
+ * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Inpainted image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb, penaltytype, i, inpaint_elements;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;   
+    const mwSize *dim_array2;   
+    
+    float *Input, *Output=NULL, lambda, tau, sigma;
+    unsigned char *Mask;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    Mask  = (unsigned char *) mxGetData(prhs[1]); /* MASK */
+    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
+    sigma = (float) mxGetScalar(prhs[3]); /* Edge-preserving parameter */
+    iter_numb = 300; /* iterations number */
+    tau = 0.025; /* marching step parameter */
+    penaltytype = 1; /* Huber penalty by default */    
+  
+    if ((nrhs < 4) || (nrhs > 7)) mexErrMsgTxt("At least 4 parameters is required, all parameters are: Image(2D/3D), Mask(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7))  iter_numb = (int) mxGetScalar(prhs[4]); /* iterations number */
+    if ((nrhs == 6) || (nrhs == 7))  tau =  (float) mxGetScalar(prhs[5]); /* marching step parameter */
+    if (nrhs == 7)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[6]); /* Huber, PM or Tukey 'Huber' is the default */
+        if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
+        if (strcmp(penalty_type, "Huber") == 0)  penaltytype = 1;  /* enable 'Huber' penalty */
+        if (strcmp(penalty_type, "PM") == 0)  penaltytype = 2;  /* enable Perona-Malik penalty */
+        if (strcmp(penalty_type, "Tukey") == 0)  penaltytype = 3;  /* enable Tikey Biweight penalty */
+        mxFree(penalty_type);
+    }    
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxUINT8_CLASS) {mexErrMsgTxt("The mask must be in uint8 precision");}
+    
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }    
+    if (number_of_dims == 3) {
+        if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    }    
+    
+    inpaint_elements = 0;
+    for (i=0; i<(int)(dimY*dimX*dimZ); i++) if (Mask[i] == 1) inpaint_elements++;
+    if (inpaint_elements == 0) mexErrMsgTxt("The mask is full of zeros, nothing to inpaint");        
+    Diffusion_Inpaint_CPU_main(Input, Mask, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c b/src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
new file mode 100644
index 0000000..b3f2c98
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
@@ -0,0 +1,84 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "NonlocalMarching_Inpaint_core.h"
+
+/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
+ * The method is heuristic but computationally efficent (especially for larger images).
+ * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
+ * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
+ *
+ * Input:
+ * 1. 2D image or sinogram [REQUIRED]
+ * 2. Mask of the same size as A in 'unsigned char' format  (ones mark the region to inpaint, zeros belong to the data) [REQUIRED]
+ * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice [OPTIONAL, default 1]
+ * 4. Number of iterations [OPTIONAL, default - calculate based on the mask]
+ *
+ * Output:
+ * 1. Inpainted sinogram  
+ * 2. updated mask
+ * Reference: TBA
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iterations, SW_increment;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    const mwSize *dim_array2;
+    
+    float *Input, *Output=NULL;
+    unsigned char *Mask, *Mask_upd=NULL;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    Mask  = (unsigned char *) mxGetData(prhs[1]); /* MASK */    
+    SW_increment = 1;
+    iterations = 0;
+            
+    if ((nrhs < 2) || (nrhs > 4)) mexErrMsgTxt("At least 4 parameters is required, all parameters are: Image(2D/3D), Mask(2D/3D), Linear increment, Iterations number");
+    if ((nrhs == 3) || (nrhs == 4))  SW_increment =  (int) mxGetScalar(prhs[2]); /* linear increment */
+    if ((nrhs == 4))  iterations =  (int) mxGetScalar(prhs[3]); /* iterations number */
+       
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxUINT8_CLASS) {mexErrMsgTxt("The mask must be in uint8 precision");}    
+    
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        Mask_upd = (unsigned char*)mxGetPr(plhs[1] = mxCreateNumericArray(2, dim_array, mxUINT8_CLASS, mxREAL));
+    }    
+    if (number_of_dims == 3) {
+        mexErrMsgTxt("Currently 2D supported only");        
+    }           
+    NonlocalMarching_Inpaint_main(Input, Mask, Output, Mask_upd, SW_increment, iterations, 0, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
new file mode 100644
index 0000000..014c0a0
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
@@ -0,0 +1,88 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "matrix.h"
+#include "mex.h"
+#include "Nonlocal_TV_core.h"
+
+#define EPS 1.0000e-9
+
+/* Matlab wrapper for C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights 
+ * 6. regularisation parameter
+ * 7. iterations number 
+ 
+ * Output:
+ * 1. denoised image/volume 	
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    long number_of_dims,  dimX, dimY, dimZ;
+    int IterNumb, NumNeighb = 0;
+    unsigned short *H_i, *H_j, *H_k;
+    const int  *dim_array;
+    const int  *dim_array2;
+    float *A_orig, *Output=NULL, *Weights, lambda;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    A_orig  = (float *) mxGetData(prhs[0]); /* a 2D image or a set of 2D images (3D stack) */
+    H_i  = (unsigned short *) mxGetData(prhs[1]); /* indeces of i neighbours */
+    H_j  = (unsigned short *) mxGetData(prhs[2]); /* indeces of j neighbours */
+    H_k  = (unsigned short *) mxGetData(prhs[3]); /* indeces of k neighbours */
+    Weights = (float *) mxGetData(prhs[4]); /* weights for patches */
+    lambda = (float) mxGetScalar(prhs[5]); /* regularisation parameter */
+    IterNumb = (int) mxGetScalar(prhs[6]); /* the number of iterations */
+ 
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];   
+         
+    /*****2D INPUT *****/
+    if (number_of_dims == 2) {
+        dimZ = 0;   
+        NumNeighb = dim_array2[2];
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));  
+        }
+    /*****3D INPUT *****/
+    /****************************************************/
+    if (number_of_dims == 3) {
+        NumNeighb = dim_array2[3];
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    
+    /* run the main function here */
+    Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb);
+}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
new file mode 100644
index 0000000..f942539
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
@@ -0,0 +1,92 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd. 
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd. 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "matrix.h"
+#include "mex.h"
+#include "PatchSelect_core.h"
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+/**************************************************/
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    int number_of_dims,  SearchWindow, SimilarWin, NumNeighb;
+    mwSize dimX, dimY, dimZ;
+    unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
+    const int  *dim_array;
+    float *A, *Weights = NULL, h;
+    int dim_array2[3]; /* for 2D data */
+    int dim_array3[4]; /* for 3D data */
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    A  = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */
+    SearchWindow = (int) mxGetScalar(prhs[1]);    /* Large Searching window */
+    SimilarWin = (int) mxGetScalar(prhs[2]);    /* Similarity window (patch-search)*/
+    NumNeighb = (int) mxGetScalar(prhs[3]); /* the total number of neighbours to take */
+    h = (float) mxGetScalar(prhs[4]); /* NLM parameter */
+
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb;  /* 2D case */
+    dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb;  /* 3D case */
+    
+    /****************2D INPUT ***************/
+    if (number_of_dims == 2) {
+        dimZ = 0;               
+        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
+        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
+        Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
+        }
+    /****************3D INPUT ***************/
+    if (number_of_dims == 3) {        
+        H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+        H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+        H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+        Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));        
+    }
+    
+    PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h, 0); 
+    
+ }
diff --git a/src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c b/src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
new file mode 100644
index 0000000..55ef2b1
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
@@ -0,0 +1,77 @@
+
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "ROF_TV_core.h"
+
+/* ROF-TV denoising/regularization model [1] (2D/3D case)
+ * (MEX wrapper for MATLAB)
+ * 
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array_i;
+    float *Input, *Output=NULL, lambda, tau;    
+    
+    dim_array_i = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter_numb =  (int) mxGetScalar(prhs[2]); /* iterations number */
+    tau =  (float) mxGetScalar(prhs[3]); /* marching step parameter */  
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number,  marching step constant");
+    /*Handling Matlab output data*/
+    dimX = dim_array_i[0]; dimY = dim_array_i[1]; dimZ = dim_array_i[2];        
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array_i, mxSINGLE_CLASS, mxREAL));          
+    }    
+    if (number_of_dims == 3) {
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array_i, mxSINGLE_CLASS, mxREAL));
+    }
+     
+    TV_ROF_CPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ);    
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/SB_TV.c b/src/Matlab/mex_compile/regularisers_CPU/SB_TV.c
new file mode 100644
index 0000000..8636322
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/SB_TV.c
@@ -0,0 +1,91 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "SB_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, methTV, printswitch;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 100; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if ((nrhs == 5) || (nrhs == 6))  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if (nrhs == 6)  {
+        printswitch = (int) mxGetScalar(prhs[5]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    SB_TV_CPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
+}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/TGV.c b/src/Matlab/mex_compile/regularisers_CPU/TGV.c
new file mode 100644
index 0000000..aa4eed4
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/TGV.c
@@ -0,0 +1,83 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "TGV_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image/volume */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularisation parameter */
+    alpha1 =  1.0f; /* parameter to control the first-order term */ 
+    alpha0 =  0.5f; /* parameter to control the second-order term */
+    iter =  300; /* Iterations number */      
+    L2 =  12.0f; /* Lipshitz constant */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha1 =  (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */ 
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha0 =  (float) mxGetScalar(prhs[3]);  /* parameter to control the second-order term */
+    if ((nrhs == 5) || (nrhs == 6))  iter =  (int) mxGetScalar(prhs[4]); /* Iterations number */      
+    if (nrhs == 6)  L2 =  (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));        
+    }
+    if (number_of_dims == 3) {
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    }       
+    /* running the function */
+    TGV_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ);        
+}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/TNV.c b/src/Matlab/mex_compile/regularisers_CPU/TNV.c
new file mode 100644
index 0000000..acea75d
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/TNV.c
@@ -0,0 +1,74 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TNV_core.h"
+/*
+ * C-OMP implementation of Total Nuclear Variation regularisation model (2D + channels) [1]
+ * The code is modified from the implementation by Joan Duran <joan.duran@uib.es> see
+ * "denoisingPDHG_ipol.cpp" in Joans Collaborative Total Variation package
+ *
+ * Input Parameters:
+ * 1. Noisy volume of 2D + channel dimension, i.e. 3D volume
+ * 2. lambda - regularisation parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+ *
+ * Output:
+ * 1. Filtered/regularized image
+ *
+ * [1]. Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.
+ */
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 4)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D + channels), Regularisation parameter, Regularization parameter, iterations number, tolerance");
+    
+    Input  = (float *) mxGetData(prhs[0]); /* noisy sequence of channels (2D + channels) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 1000; /* default iterations number */
+    epsil = 1.00e-05; /* default tolerance constant */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if (nrhs == 4)  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) mexErrMsgTxt("The input must be 3D: [X,Y,Channels]");
+    if (number_of_dims == 3) {
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+        /* running the function */
+        TNV_CPU_main(Input, Output, lambda, iter, epsil, dimX, dimY, dimZ);
+    }
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/TV_energy.c b/src/Matlab/mex_compile/regularisers_CPU/TV_energy.c
new file mode 100644
index 0000000..d457f46
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/TV_energy.c
@@ -0,0 +1,72 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "utils.h"
+/*
+ * Function to calculate TV energy value with respect to the denoising variational problem
+ * 
+ * Input:
+ * 1. Denoised Image/volume
+ * 2. Original (noisy) Image/volume
+ * 3. lambda - regularisation parameter 
+ * 
+ * Output:
+ * 1. Energy function value
+ * 
+ */
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, type;
+    
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    float *Input, *Input0, lambda;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs != 4)) mexErrMsgTxt("4 inputs: Two images or volumes of the same size required, estimated and the original (noisy), regularisation parameter, type");
+    
+    Input  = (float *) mxGetData(prhs[0]); /* Denoised Image/volume */
+    Input0  = (float *) mxGetData(prhs[1]); /* Original (noisy) Image/volume */
+    lambda =  (float) mxGetScalar(prhs[2]); /* regularisation parameter */
+    type =  (int) mxGetScalar(prhs[3]); /* type of energy */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*output energy function value */
+    plhs[0] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
+    float *funcvalA = (float *) mxGetData(plhs[0]);
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+		TV_energy2D(Input, Input0, funcvalA, lambda, type, dimX, dimY);
+		}
+    if (number_of_dims == 3) {
+        TV_energy3D(Input, Input0, funcvalA, lambda, type, dimX, dimY, dimZ);
+    }
+}
diff --git a/src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
new file mode 100644
index 0000000..0cc042b
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
@@ -0,0 +1,77 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffus_4thO_GPU_core.h"
+
+/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Edge-preserving parameter (sigma) [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL, default 300]
+ * 5. tau - time-marching step for the explicit scheme [OPTIONAL, default 0.015]
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    float *Input, *Output=NULL, lambda, tau, sigma;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+    iter_numb = 300; /* iterations number */
+    tau = 0.01; /* marching step parameter */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant");
+    if ((nrhs == 4) || (nrhs == 5))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if (nrhs == 5)  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    Diffus4th_GPU_main(Input, Output, lambda, sigma, iter_numb, tau, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
new file mode 100644
index 0000000..c174e75
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
@@ -0,0 +1,97 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_FGP_GPU_core.h"
+
+/* GPU (CUDA) implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, methTV, printswitch, nonneg;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 300; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    nonneg = 0; /* default nonnegativity switch, off - 0 */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7))  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if ((nrhs == 6) || (nrhs == 7))  {
+        nonneg = (int) mxGetScalar(prhs[5]);
+        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+    }
+    if (nrhs == 7)  {
+        printswitch = (int) mxGetScalar(prhs[6]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    TV_FGP_GPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ);    
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
new file mode 100644
index 0000000..3f5a4b3
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
@@ -0,0 +1,113 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "dTV_FGP_GPU_core.h"
+
+/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, methTV, printswitch, nonneg;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    const mwSize *dim_array2;
+    
+    float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    InputRef  = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
+    iter = 300; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    eta = 0.01; /* default smoothing constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    nonneg = 0; /* default nonnegativity switch, off - 0 */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+        
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
+    if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}   
+    
+    
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  iter = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
+    if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {
+    eta =  (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
+    }
+    if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {        
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }    
+    if ((nrhs == 8) || (nrhs == 9))  {
+        nonneg = (int) mxGetScalar(prhs[7]);
+        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+    }
+    if (nrhs == 9)  {
+        printswitch = (int) mxGetScalar(prhs[8]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }    
+   
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    dTV_FGP_GPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
new file mode 100644
index 0000000..e8da4ce
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
@@ -0,0 +1,83 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "LLT_ROF_GPU_core.h"
+
+/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+* 
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well. 
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase 
+* lambdaLLT starting with smaller values. 
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step 
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References: 
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iterationsNumb;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambdaROF, lambdaLLT, tau;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter (ROF), Regularisation parameter (LTT), iterations number, time-marching parameter");
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambdaROF =  (float) mxGetScalar(prhs[1]); /* ROF regularization parameter */
+    lambdaLLT =  (float) mxGetScalar(prhs[2]); /* ROF regularization parameter */    
+    iterationsNumb = 250;
+    tau =  0.0025;
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
+    if ((nrhs == 4) || (nrhs == 5)) iterationsNumb =  (int) mxGetScalar(prhs[3]); /* iterations number */    
+    if (nrhs == 5) tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */  
+        
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));                        
+    }    
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));   
+  
+    LLT_ROF_GPU_main(Input, Output, lambdaROF, lambdaLLT, iterationsNumb, tau, dimX, dimY, dimZ);    
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
new file mode 100644
index 0000000..1cd0cdc
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
@@ -0,0 +1,92 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include <stdio.h>
+#include <string.h>
+#include "NonlDiff_GPU_core.h"
+
+/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme. 
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume 
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended 
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb, penaltytype;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambda, tau, sigma;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+    iter_numb = 300; /* iterations number */
+    tau = 0.025; /* marching step parameter */
+    penaltytype = 1; /* Huber penalty by default */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if ((nrhs < 3) || (nrhs > 6)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if ((nrhs == 5) || (nrhs == 6))  tau =  (float) mxGetScalar(prhs[4]); /* marching step parameter */
+    if (nrhs == 6)  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[5]); /* Huber, PM or Tukey 'Huber' is the default */
+        if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
+        if (strcmp(penalty_type, "Huber") == 0)  penaltytype = 1;  /* enable 'Huber' penalty */
+        if (strcmp(penalty_type, "PM") == 0)  penaltytype = 2;  /* enable Perona-Malik penalty */
+        if (strcmp(penalty_type, "Tukey") == 0)  penaltytype = 3;  /* enable Tikey Biweight penalty */
+        mxFree(penalty_type);
+    }    
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    NonlDiff_GPU_main(Input, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
new file mode 100644
index 0000000..bd01d55
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
@@ -0,0 +1,74 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_ROF_GPU_core.h"
+
+/* ROF-TV denoising/regularization model [1] (2D/3D case)
+ * (MEX wrapper for MATLAB)
+ * 
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended  [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume 
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter_numb;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambda, tau;
+    
+    dim_array = mxGetDimensions(prhs[0]);
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    Input  = (float *) mxGetData(prhs[0]);
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter_numb =  (int) mxGetScalar(prhs[2]); /* iterations number */
+    tau =  (float) mxGetScalar(prhs[3]); /* marching step parameter */  
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number,  marching step constant");
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    /* output arrays*/
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        /* output image/volume */
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));                        
+    }    
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    TV_ROF_GPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ);    
+}
\ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
new file mode 100644
index 0000000..9d1328f
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
@@ -0,0 +1,91 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_SB_GPU_core.h"
+
+/* CUDA mex-file for implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on)  [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, methTV, printswitch;
+    mwSize dimX, dimY, dimZ;
+    const mwSize *dim_array;
+    
+    float *Input, *Output=NULL, lambda, epsil;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
+    iter = 100; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
+    if ((nrhs == 5) || (nrhs == 6))  {
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }
+    if (nrhs == 6)  {
+        printswitch = (int) mxGetScalar(prhs[5]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    TV_SB_GPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
+}
diff --git a/src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
new file mode 100644
index 0000000..edb551d
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
@@ -0,0 +1,79 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "TGV_GPU_core.h"
+
+/* CUDA implementation of Primal-Dual denoising method for 
+ * Total Generilized Variation (TGV)-L2 model [1] (2D case only)
+ *
+ * Input Parameters:
+ * 1. Noisy image (2D) (required)
+ * 2. lambda - regularisation parameter (required)
+ * 3. parameter to control the first-order term (alpha1) (default - 1)
+ * 4. parameter to control the second-order term (alpha0) (default - 0.5)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300)
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image 
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter;
+    mwSize dimX, dimY;
+    const mwSize *dim_array;
+    float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D) */
+    lambda =  (float) mxGetScalar(prhs[1]); /* regularisation parameter */
+    alpha1 =  1.0f; /* parameter to control the first-order term */ 
+    alpha0 =  0.5f; /* parameter to control the second-order term */
+    iter =  300; /* Iterations number */      
+    L2 =  12.0f; /* Lipshitz constant */
+    
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }   
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha1 =  (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */ 
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  alpha0 =  (float) mxGetScalar(prhs[3]);  /* parameter to control the second-order term */
+    if ((nrhs == 5) || (nrhs == 6))  iter =  (int) mxGetScalar(prhs[4]); /* Iterations number */      
+    if (nrhs == 6)  L2 =  (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1];
+    
+    if (number_of_dims == 2) {
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+        /* running the function */
+        TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY);        
+    }
+    if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");}       
+}
diff --git a/src/Matlab/supp/RMSE.m b/src/Matlab/supp/RMSE.m
new file mode 100644
index 0000000..002f776
--- /dev/null
+++ b/src/Matlab/supp/RMSE.m
@@ -0,0 +1,7 @@
+function err = RMSE(signal1, signal2)
+%RMSE Root Mean Squared Error
+
+err = sum((signal1 - signal2).^2)/length(signal1);  % MSE
+err = sqrt(err);                                    % RMSE
+
+end
\ No newline at end of file
diff --git a/src/Matlab/supp/my_red_yellowMAP.mat b/src/Matlab/supp/my_red_yellowMAP.mat
new file mode 100644
index 0000000..c2a5b87
Binary files /dev/null and b/src/Matlab/supp/my_red_yellowMAP.mat differ
diff --git a/src/Python/CMakeLists.txt b/src/Python/CMakeLists.txt
new file mode 100644
index 0000000..c2ef855
--- /dev/null
+++ b/src/Python/CMakeLists.txt
@@ -0,0 +1,141 @@
+#   Copyright 2018 Edoardo Pasca
+cmake_minimum_required (VERSION 3.0)
+
+project(regulariserPython)
+#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
+
+# The version number.
+
+#set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
+
+# conda orchestrated build
+message("CIL_VERSION: ${CIL_VERSION}")
+#include (GenerateExportHeader)
+
+find_package(PythonInterp REQUIRED)
+if (PYTHONINTERP_FOUND)
+  message ("Current Python " ${PYTHON_VERSION_STRING} " found " ${PYTHON_EXECUTABLE})
+endif()
+
+	
+## Build the regularisers package as a library
+message("Creating Regularisers as shared library")
+
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+
+set(CMAKE_BUILD_TYPE "Release")
+
+if(WIN32)
+  set (FLAGS "/DWIN32 /EHsc /openmp /DCCPiCore_EXPORTS")
+  set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT.lib")
+  
+  set (EXTRA_LIBRARIES)
+		
+  message("library lib: ${LIBRARY_LIB}")
+  
+elseif(UNIX)
+   set (FLAGS "-fopenmp -O2 -funsigned-char -Wall  -Wl,--no-undefined  -DCCPiReconstructionIterative_EXPORTS -std=c++0x")  
+   set (EXTRA_LIBRARIES 
+		"gomp"
+		)
+endif()
+
+# GPU regularisers
+if (BUILD_CUDA)
+    find_package(CUDA)
+    if (CUDA_FOUND)
+      message("CUDA FOUND")
+      set (SETUP_GPU_WRAPPERS "extra_libraries += ['cilregcuda']\n\
+setup( \n\
+        name='ccpi', \n\
+        description='CCPi Core Imaging Library - Image regularisers GPU',\n\
+        version=cil_version,\n\
+        cmdclass = {'build_ext': build_ext},\n\
+        ext_modules = [Extension('ccpi.filters.gpu_regularisers',\n\
+                                  sources=[ \n\
+                                          os.path.join('.' , 'src', 'gpu_regularisers.pyx' ),\n\
+                                            ],\n\
+                                 include_dirs=extra_include_dirs, \n\
+                                 library_dirs=extra_library_dirs, \n\
+                                 extra_compile_args=extra_compile_args, \n\
+                                 libraries=extra_libraries ), \n\
+        ],\n\
+        zip_safe = False,	\n\
+        packages = {'ccpi','ccpi.filters'},\n\
+    )")
+    else()
+      message("CUDA NOT FOUND")
+      set(SETUP_GPU_WRAPPERS "#CUDA NOT FOUND")
+    endif()
+endif()
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in" "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
+
+
+find_package(PythonInterp)
+find_package(PythonLibs)
+if (PYTHONINTERP_FOUND)
+  message(STATUS "Found PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}")
+  message(STATUS "Python version ${PYTHON_VERSION_STRING}")
+endif()
+if (PYTHONLIBS_FOUND)
+  message(STATUS "Found PYTHON_INCLUDE_DIRS=${PYTHON_INCLUDE_DIRS}")
+  message(STATUS "Found PYTHON_LIBRARIES=${PYTHON_LIBRARIES}")
+endif()
+
+if (PYTHONINTERP_FOUND)
+    message("Python found " ${PYTHON_EXECUTABLE})
+    set(SETUP_PY_IN "${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in")
+    set(SETUP_PY    "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
+    #set(DEPS        "${CMAKE_CURRENT_SOURCE_DIR}/module/__init__.py")
+    set (DEPS       "${CMAKE_BINARY_DIR}/Core/")
+    set(OUTPUT      "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp")
+
+    configure_file(${SETUP_PY_IN} ${SETUP_PY})
+
+    message("Core binary dir " ${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE})
+    
+    if (CONDA_BUILD)
+      add_custom_command(OUTPUT ${OUTPUT}
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+                       COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
+                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
+                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core
+                                                       ${PYTHON_EXECUTABLE} ${SETUP_PY} install
+                       COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+                       DEPENDS cilreg)
+
+    else()
+      if (WIN32)
+        add_custom_command(OUTPUT ${OUTPUT}
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+                       COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
+                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
+                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE}
+                                                       ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
+                       COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+                       DEPENDS cilreg)
+      else()
+        add_custom_command(OUTPUT ${OUTPUT}
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+                       COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+                       COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
+                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
+                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core
+                                                       ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
+                       COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+                       DEPENDS cilreg)
+      endif()
+      install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ccpi 
+              DESTINATION ${PYTHON_DEST})
+    endif()
+    
+    
+    add_custom_target(PythonWrapper ALL DEPENDS ${OUTPUT})
+
+    #install(CODE "execute_process(COMMAND ${PYTHON} ${SETUP_PY} install)")
+endif()
diff --git a/src/Python/ccpi/__init__.py b/src/Python/ccpi/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/Python/ccpi/filters/__init__.py b/src/Python/ccpi/filters/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/Python/ccpi/filters/regularisers.py b/src/Python/ccpi/filters/regularisers.py
new file mode 100644
index 0000000..588ea32
--- /dev/null
+++ b/src/Python/ccpi/filters/regularisers.py
@@ -0,0 +1,214 @@
+"""
+script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
+"""
+
+from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU, TGV_CPU, LLT_ROF_CPU, PATCHSEL_CPU, NLTV_CPU
+try:
+    from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU, PATCHSEL_GPU
+    gpu_enabled = True
+except ImportError:
+    gpu_enabled = False    
+from ccpi.filters.cpu_regularisers import NDF_INPAINT_CPU, NVM_INPAINT_CPU
+
+def ROF_TV(inputData, regularisation_parameter, iterations,
+                     time_marching_parameter,device='cpu'):
+    if device == 'cpu':
+        return TV_ROF_CPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     time_marching_parameter)
+    elif device == 'gpu' and gpu_enabled:
+        return TV_ROF_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     time_marching_parameter)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+
+def FGP_TV(inputData, regularisation_parameter,iterations,
+                     tolerance_param, methodTV, nonneg, printM, device='cpu'):
+    if device == 'cpu':
+        return TV_FGP_CPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     nonneg,
+                     printM)
+    elif device == 'gpu' and gpu_enabled:
+        return TV_FGP_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     nonneg,
+                     printM)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+def SB_TV(inputData, regularisation_parameter, iterations,
+                     tolerance_param, methodTV, printM, device='cpu'):
+    if device == 'cpu':
+        return TV_SB_CPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+    elif device == 'gpu' and gpu_enabled:
+        return TV_SB_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+def FGP_dTV(inputData, refdata, regularisation_parameter, iterations,
+                     tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'):
+    if device == 'cpu':
+        return dTV_FGP_CPU(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+    elif device == 'gpu' and gpu_enabled:
+        return dTV_FGP_GPU(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+def TNV(inputData, regularisation_parameter, iterations, tolerance_param):
+        return TNV_CPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param)
+def NDF(inputData, regularisation_parameter, edge_parameter, iterations,
+                     time_marching_parameter, penalty_type, device='cpu'):
+    if device == 'cpu':
+        return NDF_CPU(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter,
+                     penalty_type)
+    elif device == 'gpu' and gpu_enabled:
+        return NDF_GPU(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter,
+                     penalty_type)
+    else:
+        if not gpu_enabled and device == 'gpu':
+    	    raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+def Diff4th(inputData, regularisation_parameter, edge_parameter, iterations,
+                     time_marching_parameter, device='cpu'):
+    if device == 'cpu':
+        return Diff4th_CPU(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter)
+    elif device == 'gpu' and gpu_enabled:
+        return Diff4th_GPU(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+        
+def PatchSelect(inputData, searchwindow, patchwindow, neighbours, edge_parameter, device='cpu'):
+    if device == 'cpu':
+        return PATCHSEL_CPU(inputData,
+                     searchwindow,
+                     patchwindow,
+                     neighbours, 
+                     edge_parameter)
+    elif device == 'gpu' and gpu_enabled:
+        return PATCHSEL_GPU(inputData,
+                     searchwindow,
+                     patchwindow,
+                     neighbours, 
+                     edge_parameter)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+
+def NLTV(inputData, H_i, H_j, H_k, Weights, regularisation_parameter, iterations):
+    return NLTV_CPU(inputData,
+                     H_i,
+                     H_j,
+                     H_k, 
+                     Weights,
+                     regularisation_parameter,
+                     iterations)
+
+def TGV(inputData, regularisation_parameter, alpha1, alpha0, iterations,
+                     LipshitzConst, device='cpu'):
+    if device == 'cpu':
+        return TGV_CPU(inputData, 
+					regularisation_parameter, 
+					alpha1, 
+					alpha0, 
+					iterations,
+                    LipshitzConst)
+    elif device == 'gpu' and gpu_enabled:
+        return TGV_GPU(inputData, 
+					regularisation_parameter, 
+					alpha1, 
+					alpha0, 
+					iterations,
+                    LipshitzConst)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+def LLT_ROF(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations,
+                     time_marching_parameter, device='cpu'):
+    if device == 'cpu':
+        return LLT_ROF_CPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+    elif device == 'gpu' and gpu_enabled:
+        return LLT_ROF_GPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+    else:
+        if not gpu_enabled and device == 'gpu':
+            raise ValueError ('GPU is not available')
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
+def NDF_INP(inputData, maskData, regularisation_parameter, edge_parameter, iterations,
+                     time_marching_parameter, penalty_type):
+        return NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter, 
+        edge_parameter, iterations, time_marching_parameter, penalty_type)
+        
+def NVM_INP(inputData, maskData, SW_increment, iterations):
+        return NVM_INPAINT_CPU(inputData, maskData, SW_increment, iterations)
diff --git a/src/Python/setup-regularisers.py.in b/src/Python/setup-regularisers.py.in
new file mode 100644
index 0000000..462edda
--- /dev/null
+++ b/src/Python/setup-regularisers.py.in
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+import setuptools
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+
+import os
+import sys
+import numpy
+import platform	
+
+cil_version=os.environ['CIL_VERSION']
+if  cil_version == '':
+    print("Please set the environmental variable CIL_VERSION")
+    sys.exit(1)
+	
+library_include_path = ""
+library_lib_path = ""
+try:
+    library_include_path = os.environ['LIBRARY_INC']
+    library_lib_path = os.environ['LIBRARY_LIB']
+except:
+    library_include_path = os.environ['PREFIX']+'/include'
+    pass
+    
+extra_include_dirs = [numpy.get_include(), library_include_path]
+#extra_library_dirs = [os.path.join(library_include_path, "..", "lib")]
+extra_compile_args = []
+extra_library_dirs = [library_lib_path]
+extra_compile_args = []
+extra_link_args = []
+extra_libraries = ['cilreg']
+
+print ("extra_library_dirs " , extra_library_dirs)
+
+extra_include_dirs += [os.path.join(".." , ".." , "Core"),
+                       os.path.join(".." , ".." , "Core",  "regularisers_CPU"),
+                       os.path.join(".." , ".." , "Core",  "inpainters_CPU"),
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_FGP" ) , 
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_ROF" ) , 
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_SB" ) ,
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TGV" ) ,
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "LLTROF" ) ,
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "NDF" ) ,
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) , 
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "DIFF4th" ) , 
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "PatchSelect" ) ,
+						   "."]
+
+if platform.system() == 'Windows':				   
+    extra_compile_args[0:] = ['/DWIN32','/EHsc','/DBOOST_ALL_NO_LIB' , '/openmp' ]   
+else:
+    extra_compile_args = ['-fopenmp','-O2', '-funsigned-char', '-Wall', '-std=c++0x']
+    extra_libraries += [@EXTRA_OMP_LIB@]
+    
+setup(
+    name='ccpi',
+	description='CCPi Core Imaging Library - Image regularisers',
+	version=cil_version,
+    cmdclass = {'build_ext': build_ext},
+    ext_modules = [Extension("ccpi.filters.cpu_regularisers",
+                             sources=[os.path.join("." , "src", "cpu_regularisers.pyx" ) ],
+                             include_dirs=extra_include_dirs, 
+							 library_dirs=extra_library_dirs, 
+							 extra_compile_args=extra_compile_args, 
+							 libraries=extra_libraries ), 
+    
+    ],
+	zip_safe = False,	
+	packages = {'ccpi','ccpi.filters'},
+)
+
+
+@SETUP_GPU_WRAPPERS@
diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx
new file mode 100644
index 0000000..11a0617
--- /dev/null
+++ b/src/Python/src/cpu_regularisers.pyx
@@ -0,0 +1,685 @@
+# distutils: language=c++
+"""
+Copyright 2018 CCPi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: Edoardo Pasca, Daniil Kazantsev
+"""
+
+import cython
+import numpy as np
+cimport numpy as np
+
+cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
+cdef extern float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
+cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ);
+cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
+cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
+
+cdef extern float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
+cdef extern float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
+cdef extern float TV_energy2D(float *U, float *U0, float *E_val, float lambdaPar, int type, int dimX, int dimY);
+cdef extern float TV_energy3D(float *U, float *U0, float *E_val, float lambdaPar, int type, int dimX, int dimY, int dimZ);
+#****************************************************************#
+#********************** Total-variation ROF *********************#
+#****************************************************************#
+def TV_ROF_CPU(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter):
+    if inputData.ndim == 2:
+        return TV_ROF_2D(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter)
+    elif inputData.ndim == 3:
+        return TV_ROF_3D(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter)
+
+def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb,                     
+                     float marching_step_parameter):
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    # Run ROF iterations for 2D data 
+    TV_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, iterationsNumb, marching_step_parameter, dims[1], dims[0], 1)
+    
+    return outputData
+            
+def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb,
+                     float marching_step_parameter):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+           
+    # Run ROF iterations for 3D data 
+    TV_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, marching_step_parameter, dims[2], dims[1], dims[0])
+
+    return outputData
+
+#****************************************************************#
+#********************** Total-variation FGP *********************#
+#****************************************************************#
+#******** Total-variation Fast-Gradient-Projection (FGP)*********#
+def TV_FGP_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM):
+    if inputData.ndim == 2:
+        return TV_FGP_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
+    elif inputData.ndim == 3:
+        return TV_FGP_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
+
+def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run FGP-TV iterations for 2D data */
+    TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
+                       iterationsNumb, 
+                       tolerance_param,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[1],dims[0],1)
+    
+    return outputData        
+            
+def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+           
+    #/* Run FGP-TV iterations for 3D data */
+    TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+                       iterationsNumb, 
+                       tolerance_param,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[2], dims[1], dims[0])
+    return outputData 
+
+#***************************************************************#
+#********************** Total-variation SB *********************#
+#***************************************************************#
+#*************** Total-variation Split Bregman (SB)*************#
+def TV_SB_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM):
+    if inputData.ndim == 2:
+        return TV_SB_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
+    elif inputData.ndim == 3:
+        return TV_SB_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
+
+def TV_SB_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run SB-TV iterations for 2D data */
+    SB_TV_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
+                       iterationsNumb, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[1],dims[0],1)
+    
+    return outputData        
+            
+def TV_SB_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+           
+    #/* Run SB-TV iterations for 3D data */
+    SB_TV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+                       iterationsNumb, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[2], dims[1], dims[0])
+    return outputData 
+
+#***************************************************************#
+#***************** Total Generalised Variation *****************#
+#***************************************************************#
+def TGV_CPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
+    if inputData.ndim == 2:
+        return TGV_2D(inputData, regularisation_parameter, alpha1, alpha0, 
+                      iterations, LipshitzConst)
+    elif inputData.ndim == 3:
+        return TGV_3D(inputData, regularisation_parameter, alpha1, alpha0, 
+                      iterations, LipshitzConst)
+
+def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float alpha1,
+                     float alpha0,
+                     int iterationsNumb, 
+                     float LipshitzConst):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run TGV iterations for 2D data */
+    TGV_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
+                       alpha1,
+                       alpha0,
+                       iterationsNumb, 
+                       LipshitzConst,
+                       dims[1],dims[0],1)
+    return outputData
+def TGV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float alpha1,
+                     float alpha0,
+                     int iterationsNumb, 
+                     float LipshitzConst):
+                         
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+                   
+    #/* Run TGV iterations for 3D data */
+    TGV_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, 
+                       alpha1,
+                       alpha0,
+                       iterationsNumb, 
+                       LipshitzConst,
+                       dims[2], dims[1], dims[0])
+    return outputData
+
+#***************************************************************#
+#******************* ROF - LLT regularisation ******************#
+#***************************************************************#
+def LLT_ROF_CPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter):
+    if inputData.ndim == 2:
+        return LLT_ROF_2D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+    elif inputData.ndim == 3:
+        return LLT_ROF_3D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+
+def LLT_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameterROF,
+                     float regularisation_parameterLLT,
+                     int iterations, 
+                     float time_marching_parameter):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run ROF-LLT iterations for 2D data */
+    LLT_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)
+    return outputData
+
+def LLT_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameterROF,
+                     float regularisation_parameterLLT,
+                     int iterations, 
+                     float time_marching_parameter):
+						 
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+           
+    #/* Run ROF-LLT iterations for 3D data */
+    LLT_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])
+    return outputData 
+
+#****************************************************************#
+#**************Directional Total-variation FGP ******************#
+#****************************************************************#
+#******** Directional TV Fast-Gradient-Projection (FGP)*********#
+def dTV_FGP_CPU(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM):
+    if inputData.ndim == 2:
+        return dTV_FGP_2D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
+    elif inputData.ndim == 3:
+        return dTV_FGP_3D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
+
+def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+               np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run FGP-dTV iterations for 2D data */
+    dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, 
+                       iterationsNumb, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,                       
+                       nonneg,
+                       printM,
+                       dims[1], dims[0], 1)
+    
+    return outputData        
+            
+def dTV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+               np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+           
+    #/* Run FGP-dTV iterations for 3D data */
+    dTV_FGP_CPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], regularisation_parameter,
+                       iterationsNumb, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[2], dims[1], dims[0])
+    return outputData
+    
+#****************************************************************#
+#*********************Total Nuclear Variation********************#
+#****************************************************************#
+def TNV_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param):
+    if inputData.ndim == 2:
+        return 
+    elif inputData.ndim == 3:
+        return TNV_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param)
+
+def TNV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterationsNumb,
+                     float tolerance_param):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+           
+    # Run TNV iterations for 3D (X,Y,Channels) data 
+    TNV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, tolerance_param, dims[2], dims[1], dims[0])
+    return outputData
+#****************************************************************#
+#***************Nonlinear (Isotropic) Diffusion******************#
+#****************************************************************#
+def NDF_CPU(inputData, regularisation_parameter, edge_parameter, iterationsNumb,time_marching_parameter, penalty_type):
+    if inputData.ndim == 2:
+        return NDF_2D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+    elif inputData.ndim == 3:
+        return NDF_3D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+
+def NDF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,                     
+                     float time_marching_parameter,
+                     int penalty_type):
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')   
+    
+    # Run Nonlinear Diffusion iterations for 2D data 
+    Diffusion_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
+    return outputData
+            
+def NDF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,                     
+                     float time_marching_parameter,
+                     int penalty_type):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+    
+    # Run Nonlinear Diffusion iterations for  3D data 
+    Diffusion_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
+
+    return outputData
+
+#****************************************************************#
+#*************Anisotropic Fourth-Order diffusion*****************#
+#****************************************************************#
+def Diff4th_CPU(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter):
+    if inputData.ndim == 2:
+        return Diff4th_2D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter)
+    elif inputData.ndim == 3:
+        return Diff4th_3D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter)
+
+def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,                     
+                     float time_marching_parameter):
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')   
+    
+    # Run Anisotropic Fourth-Order diffusion for 2D data 
+    Diffus4th_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)
+    return outputData
+          
+def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,
+                     float time_marching_parameter):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+    
+    # Run Anisotropic Fourth-Order diffusion for  3D data 
+    Diffus4th_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])
+
+    return outputData
+
+#****************************************************************#
+#***************Patch-based weights calculation******************#
+#****************************************************************#
+def PATCHSEL_CPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
+    if inputData.ndim == 2:
+        return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
+    elif inputData.ndim == 3:
+        return 1
+def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+                     int searchwindow,
+                     int patchwindow,
+                     int neighbours,
+                     float edge_parameter):
+    cdef long dims[3]
+    dims[0] = neighbours
+    dims[1] = inputData.shape[0]
+    dims[2] = inputData.shape[1]
+    
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
+            np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
+    
+    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
+            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+            
+    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
+            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+    # Run patch-based weight selection function
+    PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow,  neighbours,  edge_parameter, 1)
+    return H_i, H_j, Weights
+"""
+def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+                     int searchwindow,
+                     int patchwindow,
+                     int neighbours,
+                     float edge_parameter):
+    cdef long dims[4]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    dims[3] = neighbours
+    
+    cdef np.ndarray[np.float32_t, ndim=4, mode="c"] Weights = \
+            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='float32')
+    
+    cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_i = \
+            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
+            
+    cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_j = \
+            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
+            
+    cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_k = \
+            np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
+
+    # Run patch-based weight selection function
+    PatchSelect_CPU_main(&inputData[0,0,0], &H_i[0,0,0,0], &H_j[0,0,0,0], &H_k[0,0,0,0], &Weights[0,0,0,0], dims[2], dims[1], dims[0], searchwindow, patchwindow,  neighbours, edge_parameter, 1)
+    return H_i, H_j, H_k, Weights
+"""
+
+#****************************************************************#
+#***************Non-local Total Variation******************#
+#****************************************************************#
+def NLTV_CPU(inputData, H_i, H_j, H_k, Weights, regularisation_parameter, iterations):
+    if inputData.ndim == 2:
+        return NLTV_2D(inputData, H_i, H_j, Weights, regularisation_parameter, iterations)
+    elif inputData.ndim == 3:
+        return 1
+def NLTV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+                     np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i,
+                     np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j,
+                     np.ndarray[np.float32_t, ndim=3, mode="c"] Weights,
+                     float regularisation_parameter,
+                     int iterations):
+
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    neighbours = H_i.shape[0]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+    
+    # Run nonlocal TV regularisation
+    Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations)
+    return outputData
+
+#*********************Inpainting WITH****************************#
+#***************Nonlinear (Isotropic) Diffusion******************#
+#****************************************************************#
+def NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type):
+    if inputData.ndim == 2:
+        return NDF_INP_2D(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+    elif inputData.ndim == 3:
+        return NDF_INP_3D(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+
+def NDF_INP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,
+                     float time_marching_parameter,
+                     int penalty_type):
+
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+    
+    # Run Inpaiting by Diffusion iterations for 2D data 
+    Diffusion_Inpaint_CPU_main(&inputData[0,0], &maskData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
+    return outputData
+            
+def NDF_INP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     np.ndarray[np.uint8_t, ndim=3, mode="c"] maskData,
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,
+                     float time_marching_parameter,
+                     int penalty_type):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+    
+    # Run Inpaiting by Diffusion iterations for 3D data 
+    Diffusion_Inpaint_CPU_main(&inputData[0,0,0], &maskData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
+
+    return outputData
+#*********************Inpainting WITH****************************#
+#***************Nonlocal Vertical Marching method****************#
+#****************************************************************#
+def NVM_INPAINT_CPU(inputData, maskData, SW_increment, iterationsNumb):
+    if inputData.ndim == 2:
+        return NVM_INP_2D(inputData, maskData, SW_increment, iterationsNumb)
+    elif inputData.ndim == 3:
+        return 
+
+def NVM_INP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+               np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
+                     int SW_increment,
+                     int iterationsNumb):
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')   
+    
+    cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData_upd = \
+            np.zeros([dims[0],dims[1]], dtype='uint8')
+    
+    # Run Inpaiting by Nonlocal vertical marching method for 2D data 
+    NonlocalMarching_Inpaint_main(&inputData[0,0], &maskData[0,0], &outputData[0,0], 
+                                  &maskData_upd[0,0],
+                                  SW_increment, iterationsNumb, 1, dims[1], dims[0], 1)
+    
+    return (outputData, maskData_upd)
+
+
+#****************************************************************#
+#***************Calculation of TV-energy functional**************#
+#****************************************************************#
+def TV_ENERGY(inputData, inputData0, regularisation_parameter, typeFunctional):
+    if inputData.ndim == 2:
+        return TV_ENERGY_2D(inputData, inputData0, regularisation_parameter, typeFunctional)
+    elif inputData.ndim == 3:
+        return TV_ENERGY_3D(inputData, inputData0, regularisation_parameter, typeFunctional)
+
+def TV_ENERGY_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                 np.ndarray[np.float32_t, ndim=2, mode="c"] inputData0, 
+                     float regularisation_parameter,
+                     int typeFunctional):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=1, mode="c"] outputData = \
+            np.zeros([1], dtype='float32')
+                   
+    # run function    
+    TV_energy2D(&inputData[0,0], &inputData0[0,0], &outputData[0], regularisation_parameter, typeFunctional, dims[1], dims[0])
+    
+    return outputData
+            
+def TV_ENERGY_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+                 np.ndarray[np.float32_t, ndim=3, mode="c"] inputData0, 
+                     float regularisation_parameter,
+                     int typeFunctional):
+						 
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=1, mode="c"] outputData = \
+            np.zeros([1], dtype='float32')
+           
+    # Run function
+    TV_energy3D(&inputData[0,0,0], &inputData0[0,0,0], &outputData[0], regularisation_parameter, typeFunctional, dims[2], dims[1], dims[0])
+
+    return outputData
diff --git a/src/Python/src/gpu_regularisers.pyx b/src/Python/src/gpu_regularisers.pyx
new file mode 100644
index 0000000..b52f669
--- /dev/null
+++ b/src/Python/src/gpu_regularisers.pyx
@@ -0,0 +1,640 @@
+# distutils: language=c++
+"""
+Copyright 2018 CCPi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: Edoardo Pasca, Daniil Kazantsev
+"""
+
+import cython
+import numpy as np
+cimport numpy as np
+
+CUDAErrorMessage = 'CUDA error'
+
+cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
+cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
+cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+
+# Total-variation Rudin-Osher-Fatemi (ROF)
+def TV_ROF_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     time_marching_parameter):
+    if inputData.ndim == 2:
+        return ROFTV2D(inputData, 
+                     regularisation_parameter,
+                     iterations,
+                     time_marching_parameter)
+    elif inputData.ndim == 3:
+        return ROFTV3D(inputData, 
+                     regularisation_parameter,
+                     iterations, 
+                     time_marching_parameter)
+                     
+# Total-variation Fast-Gradient-Projection (FGP)
+def TV_FGP_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     nonneg,
+                     printM):
+    if inputData.ndim == 2:
+        return FGPTV2D(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     nonneg,
+                     printM)
+    elif inputData.ndim == 3:
+        return FGPTV3D(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     nonneg,
+                     printM)
+# Total-variation Split Bregman (SB)
+def TV_SB_GPU(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM):
+    if inputData.ndim == 2:
+        return SBTV2D(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+    elif inputData.ndim == 3:
+        return SBTV3D(inputData,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     methodTV,
+                     printM)
+# LLT-ROF model
+def LLT_ROF_GPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter):
+    if inputData.ndim == 2:
+        return LLT_ROF_GPU2D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+    elif inputData.ndim == 3:
+        return LLT_ROF_GPU3D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+# Total Generilised Variation (TGV)
+def TGV_GPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
+    if inputData.ndim == 2:
+        return TGV2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
+    elif inputData.ndim == 3:
+        return TGV3D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
+# Directional Total-variation Fast-Gradient-Projection (FGP)
+def dTV_FGP_GPU(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM):
+    if inputData.ndim == 2:
+        return FGPdTV2D(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+    elif inputData.ndim == 3:
+        return FGPdTV3D(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+# Nonlocal Isotropic Diffusion (NDF)
+def NDF_GPU(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter,
+                     penalty_type):
+    if inputData.ndim == 2:
+        return NDF_GPU_2D(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter,
+                     penalty_type)
+    elif inputData.ndim == 3:
+        return NDF_GPU_3D(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter,
+                     penalty_type)
+# Anisotropic Fourth-Order diffusion
+def Diff4th_GPU(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter):
+    if inputData.ndim == 2:
+        return Diff4th_2D(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter)
+    elif inputData.ndim == 3:
+        return Diff4th_3D(inputData,
+                     regularisation_parameter,
+                     edge_parameter,
+                     iterations, 
+                     time_marching_parameter)
+                     
+#****************************************************************#
+#********************** Total-variation ROF *********************#
+#****************************************************************#
+def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float time_marching_parameter):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here
+    if (TV_ROF_GPU_main(
+            &inputData[0,0], &outputData[0,0], 
+                       regularisation_parameter,
+                       iterations , 
+                       time_marching_parameter, 
+                       dims[1], dims[0], 1)==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+    
+def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float time_marching_parameter):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (TV_ROF_GPU_main(
+            &inputData[0,0,0], &outputData[0,0,0], 
+                       regularisation_parameter,
+                       iterations , 
+                       time_marching_parameter, 
+                       dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+#****************************************************************#
+#********************** Total-variation FGP *********************#
+#****************************************************************#
+#******** Total-variation Fast-Gradient-Projection (FGP)*********#
+def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
+                       regularisation_parameter, 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[1], dims[0], 1)==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+    
+def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
+                       regularisation_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+#***************************************************************#
+#********************** Total-variation SB *********************#
+#***************************************************************#
+#*************** Total-variation Split Bregman (SB)*************#
+def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],
+                       regularisation_parameter, 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[1], dims[0], 1)==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+    
+def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     int methodTV,
+                     int printM):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
+                       regularisation_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       methodTV,
+                       printM,
+                       dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+
+#***************************************************************#
+#************************ LLT-ROF model ************************#
+#***************************************************************#
+#************Joint LLT-ROF model for higher order **************#
+def LLT_ROF_GPU2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameterROF,
+                     float regularisation_parameterLLT,
+                     int iterations, 
+                     float time_marching_parameter):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+    
+def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameterROF,
+                     float regularisation_parameterLLT,
+                     int iterations, 
+                     float time_marching_parameter):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+
+#***************************************************************#
+#***************** Total Generalised Variation *****************#
+#***************************************************************#
+def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float alpha1,
+                     float alpha0,
+                     int iterationsNumb, 
+                     float LipshitzConst):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run TGV iterations for 2D data */
+    if (TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
+                       alpha1,
+                       alpha0,
+                       iterationsNumb, 
+                       LipshitzConst,
+                       dims[1],dims[0], 1)==0):
+        return outputData
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+def TGV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float alpha1,
+                     float alpha0,
+                     int iterationsNumb, 
+                     float LipshitzConst):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (TGV_GPU_main(
+            &inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+                       alpha1,
+                       alpha0,
+                       iterationsNumb, 
+                       LipshitzConst,
+                       dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+
+#****************************************************************#
+#**************Directional Total-variation FGP ******************#
+#****************************************************************#
+#******** Directional TV Fast-Gradient-Projection (FGP)*********#
+def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+             np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],
+                       regularisation_parameter, 
+                       iterations, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[1], dims[0], 1)==0):
+        return outputData
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+    
+def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+             np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    if (dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],
+                       regularisation_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+
+#****************************************************************#
+#***************Nonlinear (Isotropic) Diffusion******************#
+#****************************************************************#
+def NDF_GPU_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,                     
+                     float time_marching_parameter,
+                     int penalty_type):
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+    
+    #rangecheck = penalty_type < 1 and penalty_type > 3
+    #if not rangecheck:
+#        raise ValueError('Choose penalty type as 1 for Huber, 2 - Perona-Malik, 3 - Tukey Biweight')
+    
+    # Run Nonlinear Diffusion iterations for 2D data 
+    # Running CUDA code here  
+    if (NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+            
+def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,                     
+                     float time_marching_parameter,
+                     int penalty_type):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')    
+       
+    # Run Nonlinear Diffusion iterations for  3D data 
+    # Running CUDA code here  
+    if (NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+#****************************************************************#
+#************Anisotropic Fourth-Order diffusion******************#
+#****************************************************************#
+def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,
+                     float time_marching_parameter):
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+    
+    # Run Anisotropic Fourth-Order diffusion for 2D data 
+    # Running CUDA code here  
+    if (Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)==0):
+        return outputData
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+            
+def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+                     float regularisation_parameter,
+                     float edge_parameter,
+                     int iterationsNumb,
+                     float time_marching_parameter):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1],dims[2]], dtype='float32')    
+       
+    # Run Anisotropic Fourth-Order diffusion for  3D data 
+    # Running CUDA code here  
+    if (Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])==0):
+        return outputData;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
+#****************************************************************#
+#************Patch-based weights pre-selection******************#
+#****************************************************************#
+def PATCHSEL_GPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
+    if inputData.ndim == 2:
+        return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
+    elif inputData.ndim == 3:
+        return 1
+def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+                     int searchwindow,
+                     int patchwindow,
+                     int neighbours,
+                     float edge_parameter):
+    cdef long dims[3]
+    dims[0] = neighbours
+    dims[1] = inputData.shape[0]
+    dims[2] = inputData.shape[1]    
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
+            np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
+    
+    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
+            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+            
+    cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
+            np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+    # Run patch-based weight selection function
+    if (PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow,  neighbours,  edge_parameter)==0):
+        return H_i, H_j, Weights;
+    else:
+        raise ValueError(CUDAErrorMessage);
+
diff --git a/test/lena_gray_512.tif b/test/lena_gray_512.tif
new file mode 100644
index 0000000..f80cafc
Binary files /dev/null and b/test/lena_gray_512.tif differ
diff --git a/test/test_ROF_TV.py b/test/test_ROF_TV.py
new file mode 100644
index 0000000..dda38b7
--- /dev/null
+++ b/test/test_ROF_TV.py
@@ -0,0 +1,127 @@
+import unittest
+import math
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV
+#, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from testroutines import *
+
+class TestRegularisers(unittest.TestCase):
+
+    def test_ROF_TV_CPU(self):
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+
+        Im = Im / 255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc=0,
+                                   scale=perc * Im,
+                                   size=np.shape(Im))
+        u_ref = Im + np.random.normal(loc=0,
+                                      scale=0.01 * Im,
+                                      size=np.shape(Im))
+
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+
+
+        # set parameters
+        pars = {'algorithm': ROF_TV, \
+                'input': u0, \
+                'regularisation_parameter': 0.04, \
+                'number_of_iterations': 2500, \
+                'time_marching_parameter': 0.00002
+                }
+        print("#############ROF TV CPU####################")
+        start_time = timeit.default_timer()
+        rof_cpu = ROF_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['time_marching_parameter'], 'cpu')
+        rms = rmse(Im, rof_cpu)
+        pars['rmse'] = rms
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time', timeit.default_timer() - start_time)
+        print(txtstr)
+
+        self.assertTrue(math.isclose(rms,0.02067839,rel_tol=1e-2))
+
+
+    def test_ROF_TV_CPU_vs_GPU(self):
+        # print ("tomas debug test function")
+        print(__name__)
+        self.fail("testfail2")
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+
+        Im = Im / 255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc=0,
+                                   scale=perc * Im,
+                                   size=np.shape(Im))
+        u_ref = Im + np.random.normal(loc=0,
+                                      scale=0.01 * Im,
+                                      size=np.shape(Im))
+
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+
+        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print("____________ROF-TV bench___________________")
+        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+        # set parameters
+        pars = {'algorithm': ROF_TV, \
+                'input': u0, \
+                'regularisation_parameter': 0.04, \
+                'number_of_iterations': 2500, \
+                'time_marching_parameter': 0.00002
+                }
+        print("##############ROF TV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            rof_gpu = ROF_TV(pars['input'],
+                             pars['regularisation_parameter'],
+                             pars['number_of_iterations'],
+                             pars['time_marching_parameter'], 'gpu')
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, rof_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = ROF_TV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time', timeit.default_timer() - start_time)
+        print(txtstr)
+
+        print("#############ROF TV CPU####################")
+        start_time = timeit.default_timer()
+        rof_cpu = ROF_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['time_marching_parameter'], 'cpu')
+        rms = rmse(Im, rof_cpu)
+        pars['rmse'] = rms
+
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time', timeit.default_timer() - start_time)
+        print(txtstr)
+        print("--------Compare the results--------")
+        tolerance = 1e-04
+        diff_im = np.zeros(np.shape(rof_cpu))
+        diff_im = abs(rof_cpu - rof_gpu)
+        diff_im[diff_im > tolerance] = 1
+        self.assertLessEqual(diff_im.sum(), 1)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/testroutines.py b/test/testroutines.py
new file mode 100644
index 0000000..8da5c5e
--- /dev/null
+++ b/test/testroutines.py
@@ -0,0 +1,37 @@
+import numpy as np
+from PIL import Image
+
+class TiffReader(object):
+    def imread(self, filename):
+        return np.asarray(Image.open(filename))
+
+
+###############################################################################
+def printParametersToString(pars):
+    txt = r''
+    for key, value in pars.items():
+        if key == 'algorithm':
+            txt += "{0} = {1}".format(key, value.__name__)
+        elif key == 'input':
+            txt += "{0} = {1}".format(key, np.shape(value))
+        elif key == 'refdata':
+            txt += "{0} = {1}".format(key, np.shape(value))
+        else:
+            txt += "{0} = {1}".format(key, value)
+        txt += '\n'
+    return txt
+
+
+def nrmse(im1, im2):
+    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
+    max_val = max(np.max(im1), np.max(im2))
+    min_val = min(np.min(im1), np.min(im2))
+    return 1 - (rmse / (max_val - min_val))
+
+
+def rmse(im1, im2):
+    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
+    return rmse
+
+
+###############################################################################
-- 
cgit v1.2.3


From b9dc6d0b947a2287d6761e0b0a35525dd0fe7e7f Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Thu, 21 Feb 2019 04:09:41 -0500
Subject: FIX: directory links, install instructions

---
 CMakeLists.txt                      |  4 ++--
 Readme.md                           | 19 +++++++++++++++++--
 recipe/build.sh                     |  8 ++++----
 recipe/meta.yaml                    |  2 +-
 src/CMakeLists.txt                  |  1 +
 src/Python/CMakeLists.txt           | 24 ++++++++++++------------
 src/Python/setup-regularisers.py.in | 24 ++++++++++++------------
 7 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5d3bbbd..043f13c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,7 +46,7 @@ set(MATLAB_DEST "${CMAKE_INSTALL_PREFIX}/matlab")
 endif()
 message(STATUS "Matlab wrappers will be installed in " ${MATLAB_DEST})
 
-set(PYTHON_DEST_DIR "" CACHE PATH "Directory of the Matlab wrappers")
+set(PYTHON_DEST_DIR "" CACHE PATH "Directory of the Python wrappers")
 if (PYTHON_DEST_DIR)
  set(PYTHON_DEST "${PYTHON_DEST_DIR}")
 else() 
@@ -55,5 +55,5 @@ endif()
 message(STATUS "Python wrappers will be installed in " ${PYTHON_DEST})
 
 
-add_subdirectory(src/Core)
+#add_subdirectory(src/Core)
 add_subdirectory(src)
diff --git a/Readme.md b/Readme.md
index ebd4d20..a9433d2 100644
--- a/Readme.md
+++ b/Readme.md
@@ -50,13 +50,28 @@
 
 ## Installation:
 
-The package comes as a [CMake](https://cmake.org) project so you will need CMake (v.>=3) to configure it. Additionally you will need a C compiler, `make` (on linux) and CUDA SDK where available. The toolkit may be used directly from C/C++ as it is compiled as a shared library (check-out the include files in `Core` for this). We provide wrappers for Python and Matlab.
+The package comes as a [CMake](https://cmake.org) project
+and additional wrappers for Python and Matlab.
 
+To install precompiled binaries, you need `conda` and install from `ccpi` channel using :
+```
+conda install ccpi-regulariser -c ccpi -c conda-forge
+```
+
+In order to compile C/C++ sources and additional wrappers from source code, the recommended way is:
+```
+git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit
+cd CCPi-Regularisation-Toolkit
+build/jenkins-build.sh
+```
+this will install `conda build` environment and compiles C/C++ and Python wrappers and performs basic tests.
+
+### CMake
+If you want to build directly using cmake, install CMake (v.>=3) to configure it. Additionally you will need a C compiler, `make` (on linux) and CUDA SDK where available. The toolkit may be used directly from C/C++ as it is compiled as a shared library (check-out the include files in `Core` for this) 
 1. Clone this repository to a directory, i.e. `CCPi-Regularisation-Toolkit`, 
 2. create a build directory. 
 3. Issue `cmake` to configure (or `cmake-gui`, or `ccmake`, or `cmake3`). Use additional flags to fine tune the configuration. 
 
-### CMake flags
 Flags used during configuration
 
 | CMake flag | type | meaning |
diff --git a/recipe/build.sh b/recipe/build.sh
index 1d54b6f..a156193 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -1,8 +1,8 @@
 
-mkdir "$SRC_DIR/ccpi"
-cp -rv "$RECIPE_DIR/../src/Matlab" "$SRC_DIR/ccpi"
-cp -rv "$RECIPE_DIR/../src/Python" "$SRC_DIR/ccpi"
-cp -rv "$RECIPE_DIR/../src/Core" "$SRC_DIR/Core"
+#mkdir "$SRC_DIR/ccpi"
+#cp -rv "$RECIPE_DIR/../src/Matlab" "$SRC_DIR/ccpi"
+#cp -rv "$RECIPE_DIR/../src/Python" "$SRC_DIR/ccpi"
+#cp -rv "$RECIPE_DIR/../src/Core" "$SRC_DIR/Core"
 
 cd $SRC_DIR
 ##cuda=off
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 7435b2b..61d17bd 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -10,7 +10,7 @@ build:
   
 test:
   files:
-    - lena_gray_512.tif
+    - ../test/lena_gray_512.tif
   requires:
     - pillow=4.1.1
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index bdcb8f4..5fe1a57 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+add_subdirectory(Core)
 if (BUILD_MATLAB_WRAPPER)
     add_subdirectory(Matlab)
 endif()
diff --git a/src/Python/CMakeLists.txt b/src/Python/CMakeLists.txt
index c2ef855..ab95ecc 100644
--- a/src/Python/CMakeLists.txt
+++ b/src/Python/CMakeLists.txt
@@ -7,7 +7,7 @@ project(regulariserPython)
 # The version number.
 
 #set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
-
+message("Creating Python Wrapper")
 # conda orchestrated build
 message("CIL_VERSION: ${CIL_VERSION}")
 #include (GenerateExportHeader)
@@ -19,7 +19,7 @@ endif()
 
 	
 ## Build the regularisers package as a library
-message("Creating Regularisers as shared library")
+#TODO message("Creating Regularisers as shared library")
 
 message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
 
@@ -87,7 +87,7 @@ if (PYTHONINTERP_FOUND)
     set(SETUP_PY_IN "${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in")
     set(SETUP_PY    "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
     #set(DEPS        "${CMAKE_CURRENT_SOURCE_DIR}/module/__init__.py")
-    set (DEPS       "${CMAKE_BINARY_DIR}/Core/")
+    set (DEPS       "${CMAKE_BINARY_DIR}/src/Core/")
     set(OUTPUT      "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp")
 
     configure_file(${SETUP_PY_IN} ${SETUP_PY})
@@ -99,9 +99,9 @@ if (PYTHONINTERP_FOUND)
                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
                        COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
-                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core
+                                                       PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core
                                                        ${PYTHON_EXECUTABLE} ${SETUP_PY} install
                        COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
                        DEPENDS cilreg)
@@ -112,9 +112,9 @@ if (PYTHONINTERP_FOUND)
                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
                        COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
-                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE}
+                                                       PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core/${CMAKE_BUILD_TYPE}
                                                        ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
                        COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
                        DEPENDS cilreg)
@@ -123,9 +123,9 @@ if (PYTHONINTERP_FOUND)
                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
                        COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
-                                                       PREFIX=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/Core 
-                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/Core
+                                                       PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+                                                       LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+                                                       LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core
                                                        ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
                        COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
                        DEPENDS cilreg)
diff --git a/src/Python/setup-regularisers.py.in b/src/Python/setup-regularisers.py.in
index 462edda..59be768 100644
--- a/src/Python/setup-regularisers.py.in
+++ b/src/Python/setup-regularisers.py.in
@@ -34,18 +34,18 @@ extra_libraries = ['cilreg']
 
 print ("extra_library_dirs " , extra_library_dirs)
 
-extra_include_dirs += [os.path.join(".." , ".." , "Core"),
-                       os.path.join(".." , ".." , "Core",  "regularisers_CPU"),
-                       os.path.join(".." , ".." , "Core",  "inpainters_CPU"),
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_FGP" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_ROF" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_SB" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TGV" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "LLTROF" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "NDF" ) ,
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "DIFF4th" ) , 
-                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "PatchSelect" ) ,
+extra_include_dirs += [os.path.join(".." , "Core"),
+                       os.path.join(".." , "Core",  "regularisers_CPU"),
+                       os.path.join(".." , "Core",  "inpainters_CPU"),
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "TV_FGP" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "TV_ROF" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "TV_SB" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "TGV" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "LLTROF" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "NDF" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "DIFF4th" ) ,
+                       os.path.join(".." , "Core",  "regularisers_GPU" , "PatchSelect" ) ,
 						   "."]
 
 if platform.system() == 'Windows':				   
-- 
cgit v1.2.3


From c8a60f57df5a019b2b7295933dc0299d88f1e35c Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Thu, 21 Feb 2019 09:48:33 -0500
Subject: ADD: instruction to build one variant

---
 Readme.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Readme.md b/Readme.md
index a9433d2..fe8fce8 100644
--- a/Readme.md
+++ b/Readme.md
@@ -58,10 +58,11 @@ To install precompiled binaries, you need `conda` and install from `ccpi` channe
 conda install ccpi-regulariser -c ccpi -c conda-forge
 ```
 
-In order to compile C/C++ sources and additional wrappers from source code, the recommended way is:
+In order to compile C/C++ sources and additional wrappers from source code for numpy 1.12 and python 3.6, the recommended way is:
 ```
 git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit
 cd CCPi-Regularisation-Toolkit
+export CCPI_BUILD_ARGS="--numpy 1.12 --python 3.6"
 build/jenkins-build.sh
 ```
 this will install `conda build` environment and compiles C/C++ and Python wrappers and performs basic tests.
-- 
cgit v1.2.3


From 4505a79103e98adb33bfb4c10391319e56ae7031 Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Fri, 22 Feb 2019 06:44:53 -0500
Subject: UPDATE: docs -> demos and update paths in m and py demos

---
 Readme.md                                  |   2 +-
 build/build-install.sh                     |   4 +
 demos/data/SinoInpaint.mat                 | Bin 0 -> 3335061 bytes
 demos/data/lena_gray_512.tif               | Bin 0 -> 262598 bytes
 demos/demoMatlab_3Ddenoise.m               | 178 +++++++
 demos/demoMatlab_denoise.m                 | 189 +++++++
 demos/demoMatlab_inpaint.m                 |  35 ++
 demos/demo_cpu_inpainters.py               | 192 +++++++
 demos/demo_cpu_regularisers.py             | 572 +++++++++++++++++++++
 demos/demo_cpu_regularisers3D.py           | 458 +++++++++++++++++
 demos/demo_cpu_vs_gpu_regularisers.py      | 790 +++++++++++++++++++++++++++++
 demos/demo_gpu_regularisers.py             | 518 +++++++++++++++++++
 demos/demo_gpu_regularisers3D.py           | 460 +++++++++++++++++
 demos/images/TV_vs_NLTV.jpg                | Bin 0 -> 111273 bytes
 demos/images/probl.pdf                     | Bin 0 -> 62326 bytes
 demos/images/probl.png                     | Bin 0 -> 38161 bytes
 demos/images/reg_penalties.jpg             | Bin 0 -> 237455 bytes
 demos/qualitymetrics.py                    |  18 +
 docs/data/SinoInpaint.mat                  | Bin 3335061 -> 0 bytes
 docs/data/lena_gray_512.tif                | Bin 262598 -> 0 bytes
 docs/demos/demoMatlab_3Ddenoise.m          | 178 -------
 docs/demos/demoMatlab_denoise.m            | 189 -------
 docs/demos/demoMatlab_inpaint.m            |  35 --
 docs/demos/demo_cpu_inpainters.py          | 192 -------
 docs/demos/demo_cpu_regularisers.py        | 572 ---------------------
 docs/demos/demo_cpu_regularisers3D.py      | 458 -----------------
 docs/demos/demo_cpu_vs_gpu_regularisers.py | 790 -----------------------------
 docs/demos/demo_gpu_regularisers.py        | 518 -------------------
 docs/demos/demo_gpu_regularisers3D.py      | 460 -----------------
 docs/demos/qualitymetrics.py               |  18 -
 docs/images/TV_vs_NLTV.jpg                 | Bin 111273 -> 0 bytes
 docs/images/probl.pdf                      | Bin 62326 -> 0 bytes
 docs/images/probl.png                      | Bin 38161 -> 0 bytes
 docs/images/reg_penalties.jpg              | Bin 237455 -> 0 bytes
 docs/installation.txt                      |  11 -
 test/test_CPU_regularisers.py              |  91 ++++
 test/test_FGP_TV.py                        | 152 ++++++
 test/test_ROF_TV.py                        |   3 -
 38 files changed, 3658 insertions(+), 3425 deletions(-)
 create mode 100755 build/build-install.sh
 create mode 100644 demos/data/SinoInpaint.mat
 create mode 100644 demos/data/lena_gray_512.tif
 create mode 100644 demos/demoMatlab_3Ddenoise.m
 create mode 100644 demos/demoMatlab_denoise.m
 create mode 100644 demos/demoMatlab_inpaint.m
 create mode 100644 demos/demo_cpu_inpainters.py
 create mode 100644 demos/demo_cpu_regularisers.py
 create mode 100644 demos/demo_cpu_regularisers3D.py
 create mode 100644 demos/demo_cpu_vs_gpu_regularisers.py
 create mode 100644 demos/demo_gpu_regularisers.py
 create mode 100644 demos/demo_gpu_regularisers3D.py
 create mode 100644 demos/images/TV_vs_NLTV.jpg
 create mode 100644 demos/images/probl.pdf
 create mode 100644 demos/images/probl.png
 create mode 100644 demos/images/reg_penalties.jpg
 create mode 100644 demos/qualitymetrics.py
 delete mode 100644 docs/data/SinoInpaint.mat
 delete mode 100644 docs/data/lena_gray_512.tif
 delete mode 100644 docs/demos/demoMatlab_3Ddenoise.m
 delete mode 100644 docs/demos/demoMatlab_denoise.m
 delete mode 100644 docs/demos/demoMatlab_inpaint.m
 delete mode 100644 docs/demos/demo_cpu_inpainters.py
 delete mode 100644 docs/demos/demo_cpu_regularisers.py
 delete mode 100644 docs/demos/demo_cpu_regularisers3D.py
 delete mode 100644 docs/demos/demo_cpu_vs_gpu_regularisers.py
 delete mode 100644 docs/demos/demo_gpu_regularisers.py
 delete mode 100644 docs/demos/demo_gpu_regularisers3D.py
 delete mode 100644 docs/demos/qualitymetrics.py
 delete mode 100644 docs/images/TV_vs_NLTV.jpg
 delete mode 100644 docs/images/probl.pdf
 delete mode 100644 docs/images/probl.png
 delete mode 100644 docs/images/reg_penalties.jpg
 delete mode 100644 docs/installation.txt
 create mode 100644 test/test_CPU_regularisers.py
 create mode 100644 test/test_FGP_TV.py

diff --git a/Readme.md b/Readme.md
index fe8fce8..112d606 100644
--- a/Readme.md
+++ b/Readme.md
@@ -65,7 +65,7 @@ cd CCPi-Regularisation-Toolkit
 export CCPI_BUILD_ARGS="--numpy 1.12 --python 3.6"
 build/jenkins-build.sh
 ```
-this will install `conda build` environment and compiles C/C++ and Python wrappers and performs basic tests.
+this will install `conda build` environment and compiles C/C++ and Python wrappers and performs basic tests for environment with python 3.6 and numpy 1.12.
 
 ### CMake
 If you want to build directly using cmake, install CMake (v.>=3) to configure it. Additionally you will need a C compiler, `make` (on linux) and CUDA SDK where available. The toolkit may be used directly from C/C++ as it is compiled as a shared library (check-out the include files in `Core` for this) 
diff --git a/build/build-install.sh b/build/build-install.sh
new file mode 100755
index 0000000..def66a4
--- /dev/null
+++ b/build/build-install.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+export CCPI_BUILD_ARGS="--numpy 1.12 --python 3.6"
+bash <(curl -L https://raw.githubusercontent.com/vais-ral/CCPi-VirtualMachine/master/scripts/jenkins-build.sh)
+conda install -y ccpi-regulariser --use-local --force
\ No newline at end of file
diff --git a/demos/data/SinoInpaint.mat b/demos/data/SinoInpaint.mat
new file mode 100644
index 0000000..d748fb4
Binary files /dev/null and b/demos/data/SinoInpaint.mat differ
diff --git a/demos/data/lena_gray_512.tif b/demos/data/lena_gray_512.tif
new file mode 100644
index 0000000..f80cafc
Binary files /dev/null and b/demos/data/lena_gray_512.tif differ
diff --git a/demos/demoMatlab_3Ddenoise.m b/demos/demoMatlab_3Ddenoise.m
new file mode 100644
index 0000000..cdd3117
--- /dev/null
+++ b/demos/demoMatlab_3Ddenoise.m
@@ -0,0 +1,178 @@
+% Volume (3D) denoising demo using CCPi-RGL
+clear; close all
+Path1 = sprintf(['..' filesep 'src' filesep 'Matlab' filesep 'mex_compile' filesep 'installed'], 1i);
+Path2 = sprintf(['data' filesep], 1i);
+Path3 = sprintf(['..' filesep 'src' filesep 'Matlab' filesep 'supp'], 1i);
+addpath(Path1);
+addpath(Path2);
+addpath(Path3);
+
+N = 512; 
+slices = 7;
+vol3D = zeros(N,N,slices, 'single');
+Ideal3D = zeros(N,N,slices, 'single');
+Im = double(imread('lena_gray_512.tif'))/255;  % loading image
+for i = 1:slices
+vol3D(:,:,i) = Im + .05*randn(size(Im)); 
+Ideal3D(:,:,i) = Im;
+end
+vol3D(vol3D < 0) = 0;
+figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
+
+
+lambda_reg = 0.03; % regularsation parameter for all methods
+%%
+fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
+tau_rof = 0.0025; % time-marching constant 
+iter_rof = 300; % number of ROF iterations
+tic; u_rof = ROF_TV(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; 
+energyfunc_val_rof = TV_energy(single(u_rof),single(vol3D),lambda_reg, 1);  % get energy function value
+rmse_rof = (RMSE(Ideal3D(:),u_rof(:)));
+fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rof);
+figure; imshow(u_rof(:,:,7), [0 1]); title('ROF-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the ROF-TV model (GPU) \n');
+% tau_rof = 0.0025; % time-marching constant 
+% iter_rof = 300; % number of ROF iterations
+% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_reg, iter_rof, tau_rof); toc;
+% rmse_rofG = (RMSE(Ideal3D(:),u_rofG(:)));
+% fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rofG);
+% figure; imshow(u_rofG(:,:,7), [0 1]); title('ROF-TV denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using the FGP-TV model (CPU) \n');
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+tic; u_fgp = FGP_TV(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
+energyfunc_val_fgp = TV_energy(single(u_fgp),single(vol3D),lambda_reg, 1); % get energy function value
+rmse_fgp = (RMSE(Ideal3D(:),u_fgp(:)));
+fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgp);
+figure; imshow(u_fgp(:,:,7), [0 1]); title('FGP-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the FGP-TV model (GPU) \n');
+% iter_fgp = 300; % number of FGP iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
+% rmse_fgpG = (RMSE(Ideal3D(:),u_fgpG(:)));
+% fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgpG);
+% figure; imshow(u_fgpG(:,:,7), [0 1]); title('FGP-TV denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using the SB-TV model (CPU) \n');
+iter_sb = 150; % number of SB iterations
+epsil_tol =  1.0e-05; % tolerance
+tic; u_sb = SB_TV(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
+energyfunc_val_sb = TV_energy(single(u_sb),single(vol3D),lambda_reg, 1);  % get energy function value
+rmse_sb = (RMSE(Ideal3D(:),u_sb(:)));
+fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sb);
+figure; imshow(u_sb(:,:,7), [0 1]); title('SB-TV denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the SB-TV model (GPU) \n');
+% iter_sb = 150; % number of SB iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_sbG = SB_TV_GPU(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
+% rmse_sbG = (RMSE(Ideal3D(:),u_sbG(:)));
+% fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sbG);
+% figure; imshow(u_sbG(:,:,7), [0 1]); title('SB-TV denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using the ROF-LLT model (CPU) \n');
+lambda_ROF = lambda_reg; % ROF regularisation parameter
+lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
+iter_LLT = 300; % iterations 
+tau_rof_llt = 0.0025; % time-marching constant 
+tic; u_rof_llt = LLT_ROF(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt(:)));
+fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
+figure; imshow(u_rof_llt(:,:,7), [0 1]); title('ROF-LLT denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using the ROF-LLT model (GPU) \n');
+% lambda_ROF = lambda_reg; % ROF regularisation parameter
+% lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
+% iter_LLT = 300; % iterations 
+% tau_rof_llt = 0.0025; % time-marching constant 
+% tic; u_rof_llt_g = LLT_ROF_GPU(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+% rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt_g(:)));
+% fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
+% figure; imshow(u_rof_llt_g(:,:,7), [0 1]); title('ROF-LLT denoised volume (GPU)');
+%%
+fprintf('Denoise a volume using Nonlinear-Diffusion model (CPU) \n');
+iter_diff = 300; % number of diffusion iterations
+lambda_regDiff = 0.025; % regularisation for the diffusivity 
+sigmaPar = 0.015; % edge-preserving parameter
+tau_param = 0.025; % time-marching constant 
+tic; u_diff = NonlDiff(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+rmse_diff = (RMSE(Ideal3D(:),u_diff(:)));
+fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
+figure; imshow(u_diff(:,:,7), [0 1]); title('Diffusion denoised volume (CPU)');
+%%
+% fprintf('Denoise a volume using Nonlinear-Diffusion model (GPU) \n');
+% iter_diff = 300; % number of diffusion iterations
+% lambda_regDiff = 0.025; % regularisation for the diffusivity 
+% sigmaPar = 0.015; % edge-preserving parameter
+% tau_param = 0.025; % time-marching constant 
+% tic; u_diff_g = NonlDiff_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+% rmse_diff = (RMSE(Ideal3D(:),u_diff_g(:)));
+% fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
+% figure; imshow(u_diff_g(:,:,7), [0 1]); title('Diffusion denoised volume (GPU)');
+%%
+fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
+iter_diff = 300; % number of diffusion iterations
+lambda_regDiff = 3.5; % regularisation for the diffusivity 
+sigmaPar = 0.02; % edge-preserving parameter
+tau_param = 0.0015; % time-marching constant 
+tic; u_diff4 = Diffusion_4thO(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+rmse_diff4 = (RMSE(Ideal3D(:),u_diff4(:)));
+fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
+figure; imshow(u_diff4(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (CPU)');
+%%
+% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
+% iter_diff = 300; % number of diffusion iterations
+% lambda_regDiff = 3.5; % regularisation for the diffusivity 
+% sigmaPar = 0.02; % edge-preserving parameter
+% tau_param = 0.0015; % time-marching constant 
+% tic; u_diff4_g = Diffusion_4thO_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+% rmse_diff4 = (RMSE(Ideal3D(:),u_diff4_g(:)));
+% fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
+% figure; imshow(u_diff4_g(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (GPU)');
+%%
+fprintf('Denoise using the TGV model (CPU) \n');
+lambda_TGV = 0.03; % regularisation parameter
+alpha1 = 1.0; % parameter to control the first-order term
+alpha0 = 2.0; % parameter to control the second-order term
+iter_TGV = 500; % number of Primal-Dual iterations for TGV
+tic; u_tgv = TGV(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+rmseTGV = RMSE(Ideal3D(:),u_tgv(:));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
+figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)');
+%%
+%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
+fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
+
+% create another volume (reference) with slightly less amount of noise
+vol3D_ref = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
+end
+vol3D_ref(vol3D_ref < 0) = 0;
+% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv(:,:,7), [0 1]); title('FGP-dTV denoised volume (CPU)');
+%%
+fprintf('Denoise a volume using the FGP-dTV model (GPU) \n');
+
+% create another volume (reference) with slightly less amount of noise
+vol3D_ref = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
+end
+vol3D_ref(vol3D_ref < 0) = 0;
+% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv_g(:,:,7), [0 1]); title('FGP-dTV denoised volume (GPU)');
+%%
diff --git a/demos/demoMatlab_denoise.m b/demos/demoMatlab_denoise.m
new file mode 100644
index 0000000..2031853
--- /dev/null
+++ b/demos/demoMatlab_denoise.m
@@ -0,0 +1,189 @@
+% Image (2D) denoising demo using CCPi-RGL
+clear; close all
+fsep = '/';
+
+Path1 = sprintf(['..' fsep 'src' fsep 'Matlab' fsep 'mex_compile' fsep 'installed'], 1i);
+Path2 = sprintf([ data' fsep], 1i);
+Path3 = sprintf(['..' filesep 'src' filesep 'Matlab' filesep 'supp'], 1i);
+addpath(Path1); addpath(Path2); addpath(Path3);
+
+Im = double(imread('lena_gray_512.tif'))/255;  % loading image
+u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+figure; imshow(u0, [0 1]); title('Noisy image');
+
+lambda_reg = 0.03; % regularsation parameter for all methods
+%%
+fprintf('Denoise using the ROF-TV model (CPU) \n');
+tau_rof = 0.0025; % time-marching constant 
+iter_rof = 750; % number of ROF iterations
+tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; 
+energyfunc_val_rof = TV_energy(single(u_rof),single(u0),lambda_reg, 1);  % get energy function value
+rmseROF = (RMSE(u_rof(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for ROF-TV is:', rmseROF);
+figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
+%%
+% fprintf('Denoise using the ROF-TV model (GPU) \n');
+% tau_rof = 0.0025; % time-marching constant 
+% iter_rof = 750; % number of ROF iterations
+% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc;
+% figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
+%%
+fprintf('Denoise using the FGP-TV model (CPU) \n');
+iter_fgp = 1000; % number of FGP iterations
+epsil_tol =  1.0e-06; % tolerance
+tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
+energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value
+rmseFGP = (RMSE(u_fgp(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmseFGP);
+figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
+
+%%
+% fprintf('Denoise using the FGP-TV model (GPU) \n');
+% iter_fgp = 1000; % number of FGP iterations
+% epsil_tol =  1.0e-05; % tolerance
+% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
+% figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
+%%
+fprintf('Denoise using the SB-TV model (CPU) \n');
+iter_sb = 150; % number of SB iterations
+epsil_tol =  1.0e-06; % tolerance
+tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
+energyfunc_val_sb = TV_energy(single(u_sb),single(u0),lambda_reg, 1);  % get energy function value
+rmseSB = (RMSE(u_sb(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmseSB);
+figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
+%%
+% fprintf('Denoise using the SB-TV model (GPU) \n');
+% iter_sb = 150; % number of SB iterations
+% epsil_tol =  1.0e-06; % tolerance
+% tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
+% figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');
+%%
+fprintf('Denoise using the TGV model (CPU) \n');
+lambda_TGV = 0.045; % regularisation parameter
+alpha1 = 1.0; % parameter to control the first-order term
+alpha0 = 2.0; % parameter to control the second-order term
+iter_TGV = 2000; % number of Primal-Dual iterations for TGV
+tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+rmseTGV = (RMSE(u_tgv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
+figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)');
+%%
+% fprintf('Denoise using the TGV model (GPU) \n');
+% lambda_TGV = 0.045; % regularisation parameter
+% alpha1 = 1.0; % parameter to control the first-order term
+% alpha0 = 2.0; % parameter to control the second-order term
+% iter_TGV = 2000; % number of Primal-Dual iterations for TGV
+% tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
+% rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));
+% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu);
+% figure; imshow(u_tgv_gpu, [0 1]); title('TGV denoised image (GPU)');
+%%
+fprintf('Denoise using the ROF-LLT model (CPU) \n');
+lambda_ROF = lambda_reg; % ROF regularisation parameter
+lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
+iter_LLT = 1; % iterations 
+tau_rof_llt = 0.0025; % time-marching constant 
+tic; u_rof_llt = LLT_ROF(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+rmseROFLLT = (RMSE(u_rof_llt(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT);
+figure; imshow(u_rof_llt, [0 1]); title('ROF-LLT denoised image (CPU)');
+%%
+% fprintf('Denoise using the ROF-LLT model (GPU) \n');
+% lambda_ROF = lambda_reg; % ROF regularisation parameter
+% lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
+% iter_LLT = 500; % iterations 
+% tau_rof_llt = 0.0025; % time-marching constant 
+% tic; u_rof_llt_g = LLT_ROF_GPU(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
+% rmseROFLLT_g = (RMSE(u_rof_llt_g(:),Im(:)));
+% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT_g);
+% figure; imshow(u_rof_llt_g, [0 1]); title('ROF-LLT denoised image (GPU)');
+%%
+fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n');
+iter_diff = 800; % number of diffusion iterations
+lambda_regDiff = 0.025; % regularisation for the diffusivity 
+sigmaPar = 0.015; % edge-preserving parameter
+tau_param = 0.025; % time-marching constant 
+tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+rmseDiffus = (RMSE(u_diff(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus);
+figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)');
+%%
+% fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n');
+% iter_diff = 800; % number of diffusion iterations
+% lambda_regDiff = 0.025; % regularisation for the diffusivity 
+% sigmaPar = 0.015; % edge-preserving parameter
+% tau_param = 0.025; % time-marching constant 
+% tic; u_diff_g = NonlDiff_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+% figure; imshow(u_diff_g, [0 1]); title('Diffusion denoised image (GPU)');
+%%
+fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
+iter_diff = 800; % number of diffusion iterations
+lambda_regDiff = 3.5; % regularisation for the diffusivity 
+sigmaPar = 0.02; % edge-preserving parameter
+tau_param = 0.0015; % time-marching constant 
+tic; u_diff4 = Diffusion_4thO(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+rmseDiffHO = (RMSE(u_diff4(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Fourth-order anisotropic diffusion is:', rmseDiffHO);
+figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
+%%
+% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
+% iter_diff = 800; % number of diffusion iterations
+% lambda_regDiff = 3.5; % regularisation for the diffusivity 
+% sigmaPar = 0.02; % edge-preserving parameter
+% tau_param = 0.0015; % time-marching constant 
+% tic; u_diff4_g = Diffusion_4thO_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+% figure; imshow(u_diff4_g, [0 1]); title('Diffusion 4thO denoised image (GPU)');
+%%
+fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n');
+SearchingWindow = 7;
+PatchWindow = 2;
+NeighboursNumber = 20; % the number of neibours to include
+h = 0.23; % edge related parameter for NLM
+tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); toc;
+%%
+fprintf('Denoise using Non-local Total Variation (CPU) \n');
+iter_nltv = 3; % number of nltv iterations
+lambda_nltv = 0.05; % regularisation parameter for nltv
+tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc; 
+rmse_nltv = (RMSE(u_nltv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv);
+figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)');
+%%
+%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
+
+fprintf('Denoise using the FGP-dTV model (CPU) \n');
+% create another image (reference) with slightly less amount of noise
+u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
+% u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+iter_fgp = 1000; % number of FGP iterations
+epsil_tol =  1.0e-06; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+rmse_dTV= (RMSE(u_fgp_dtv(:),Im(:)));
+fprintf('%s %f \n', 'RMSE error for Directional Total Variation (dTV) is:', rmse_dTV);
+figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
+%%
+% fprintf('Denoise using the FGP-dTV model (GPU) \n');
+% % create another image (reference) with slightly less amount of noise
+% u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
+% % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+% 
+% iter_fgp = 1000; % number of FGP iterations
+% epsil_tol =  1.0e-06; % tolerance
+% eta =  0.2; % Reference image gradient smoothing constant
+% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
+% figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)');
+%%
+fprintf('Denoise using the TNV prior (CPU) \n');
+slices = 5; N = 512;
+vol3D = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D(:,:,i) = Im + .05*randn(size(Im)); 
+end
+vol3D(vol3D < 0) = 0;
+
+iter_tnv = 200; % number of TNV iterations
+tic; u_tnv = TNV(single(vol3D), lambda_reg, iter_tnv); toc; 
+figure; imshow(u_tnv(:,:,3), [0 1]); title('TNV denoised stack of channels (CPU)');
diff --git a/demos/demoMatlab_inpaint.m b/demos/demoMatlab_inpaint.m
new file mode 100644
index 0000000..a85f2b9
--- /dev/null
+++ b/demos/demoMatlab_inpaint.m
@@ -0,0 +1,35 @@
+% Image (2D) inpainting demo using CCPi-RGL
+clear; close all
+Path1 = sprintf(['..' filesep 'src' filesep 'Matlab' filesep 'mex_compile' filesep 'installed'], 1i);
+Path2 = sprintf(['data' filesep], 1i);
+addpath(Path1);
+addpath(Path2);
+
+load('SinoInpaint.mat');
+Sinogram = Sinogram./max(Sinogram(:));
+Sino_mask = Sinogram.*(1-single(Mask));
+figure; 
+subplot(1,2,1); imshow(Sino_mask, [0 1]); title('Missing data sinogram');
+subplot(1,2,2); imshow(Mask, [0 1]); title('Mask');
+%%
+fprintf('Inpaint using Linear-Diffusion model (CPU) \n');
+iter_diff = 5000; % number of diffusion iterations
+lambda_regDiff = 6000; % regularisation for the diffusivity 
+sigmaPar = 0.0; % edge-preserving parameter
+tau_param = 0.000075; % time-marching constant 
+tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
+figure; imshow(u_diff, [0 1]); title('Linear-Diffusion inpainted sinogram (CPU)');
+%%
+fprintf('Inpaint using Nonlinear-Diffusion model (CPU) \n');
+iter_diff = 1500; % number of diffusion iterations
+lambda_regDiff = 80; % regularisation for the diffusivity 
+sigmaPar = 0.00009; % edge-preserving parameter
+tau_param = 0.000008; % time-marching constant 
+tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
+figure; imshow(u_diff, [0 1]); title('Non-Linear Diffusion inpainted sinogram (CPU)');
+%%
+fprintf('Inpaint using Nonlocal Vertical Marching model (CPU) \n');
+Increment = 1; % linear increment for the searching window
+tic; [u_nom,maskupd] = NonlocalMarching_Inpaint(single(Sino_mask), Mask, Increment); toc;
+figure; imshow(u_nom, [0 1]); title('NVM inpainted sinogram (CPU)');
+%%
\ No newline at end of file
diff --git a/demos/demo_cpu_inpainters.py b/demos/demo_cpu_inpainters.py
new file mode 100644
index 0000000..d07e74a
--- /dev/null
+++ b/demos/demo_cpu_inpainters.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Demonstration of CPU inpainters
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from scipy import io
+from ccpi.filters.regularisers import NDF_INP, NVM_INP
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'maskData':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+
+# read sinogram and the mask
+filename = os.path.join("data" ,"SinoInpaint.mat")
+sino = io.loadmat(filename)
+sino_full = sino.get('Sinogram')
+Mask = sino.get('Mask')
+[angles_dim,detectors_dim] = sino_full.shape
+sino_full = sino_full/np.max(sino_full)
+#apply mask to sinogram
+sino_cut = sino_full*(1-Mask)
+#sino_cut_new = np.zeros((angles_dim,detectors_dim),'float32')
+#sino_cut_new = sino_cut.copy(order='c')
+#sino_cut_new[:] = sino_cut[:]
+sino_cut_new = np.ascontiguousarray(sino_cut, dtype=np.float32);
+#mask = np.zeros((angles_dim,detectors_dim),'uint8')
+#mask =Mask.copy(order='c')
+#mask[:] = Mask[:]
+mask = np.ascontiguousarray(Mask, dtype=np.uint8);
+
+plt.figure(1)
+plt.subplot(121)
+plt.imshow(sino_cut_new,vmin=0.0, vmax=1)
+plt.title('Missing Data sinogram')
+plt.subplot(122)
+plt.imshow(mask)
+plt.title('Mask')
+plt.show()
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Inpainting using linear diffusion (2D)__")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(2)
+plt.suptitle('Performance of linear inpainting using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Missing data sinogram')
+imgplot = plt.imshow(sino_cut_new,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF_INP, \
+        'input' : sino_cut_new,\
+        'maskData' : mask,\
+        'regularisation_parameter':5000,\
+        'edge_parameter':0,\
+        'number_of_iterations' :5000 ,\
+        'time_marching_parameter':0.000075,\
+        'penalty_type':0
+        }
+        
+start_time = timeit.default_timer()
+ndf_inp_linear = NDF_INP(pars['input'],
+              pars['maskData'],
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'])
+             
+rms = rmse(sino_full, ndf_inp_linear)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_inp_linear, cmap="gray")
+plt.title('{}'.format('Linear diffusion inpainting results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_Inpainting using nonlinear diffusion (2D)_")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Performance of nonlinear diffusion inpainting using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Missing data sinogram')
+imgplot = plt.imshow(sino_cut_new,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF_INP, \
+        'input' : sino_cut_new,\
+        'maskData' : mask,\
+        'regularisation_parameter':80,\
+        'edge_parameter':0.00009,\
+        'number_of_iterations' :1500 ,\
+        'time_marching_parameter':0.000008,\
+        'penalty_type':1
+        }
+        
+start_time = timeit.default_timer()
+ndf_inp_nonlinear = NDF_INP(pars['input'],
+              pars['maskData'],
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'])
+             
+rms = rmse(sino_full, ndf_inp_nonlinear)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_inp_nonlinear, cmap="gray")
+plt.title('{}'.format('Nonlinear diffusion inpainting results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("Inpainting using nonlocal vertical marching")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(4)
+plt.suptitle('Performance of NVM inpainting using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Missing data sinogram')
+imgplot = plt.imshow(sino_cut,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NVM_INP, \
+        'input' : sino_cut_new,\
+        'maskData' : mask,\
+        'SW_increment': 1,\
+        'number_of_iterations' : 150
+        }
+        
+start_time = timeit.default_timer()
+(nvm_inp, mask_upd) = NVM_INP(pars['input'],
+              pars['maskData'],
+              pars['SW_increment'],
+              pars['number_of_iterations'])
+             
+rms = rmse(sino_full, nvm_inp)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nvm_inp, cmap="gray")
+plt.title('{}'.format('Nonlocal Vertical Marching inpainting results'))
+#%%
diff --git a/demos/demo_cpu_regularisers.py b/demos/demo_cpu_regularisers.py
new file mode 100644
index 0000000..373502b
--- /dev/null
+++ b/demos/demo_cpu_regularisers.py
@@ -0,0 +1,572 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of CPU regularisers 
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, TNV, NDF, Diff4th
+from ccpi.filters.regularisers import PatchSelect, NLTV
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join( "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255.0
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+
+# change dims to check that modules work with non-squared images
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________ROF-TV (2D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of ROF-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 1200,\
+        'time_marching_parameter': 0.0025        
+        }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'cpu')
+rms = rmse(Im, rof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-TV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, fgp_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of SB-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, sb_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____Total Generalised Variation (2D)______")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :1350 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+             
+rms = rmse(Im, tgv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("______________LLT- ROF (2D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : u0,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.01, \
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+        
+print ("#############LLT- ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_cpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+
+rms = rmse(Im, lltrof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("________________NDF (2D)___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NDF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : u0,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':1
+        }
+        
+print ("#############NDF CPU################")
+start_time = timeit.default_timer()
+ndf_cpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'cpu')  
+             
+rms = rmse(Im, ndf_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of Diff4th regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############Diff4th CPU################")
+start_time = timeit.default_timer()
+diff4_cpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+             
+rms = rmse(Im, diff4_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal patches pre-calculation____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+start_time = timeit.default_timer()
+# set parameters
+pars = {'algorithm' : PatchSelect, \
+        'input' : u0,\
+        'searchwindow': 7, \
+        'patchwindow': 2,\
+        'neighbours' : 15 ,\
+        'edge_parameter':0.18}
+
+H_i, H_j, Weights = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'cpu')
+              
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+"""
+plt.figure()
+plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
+plt.show()
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal Total Variation penalty____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NLTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars2 = {'algorithm' : NLTV, \
+        'input' : u0,\
+        'H_i': H_i, \
+        'H_j': H_j,\
+        'H_k' : 0,\
+        'Weights' : Weights,\
+        'regularisation_parameter': 0.04,\
+        'iterations': 3
+        }
+start_time = timeit.default_timer()
+nltv_cpu = NLTV(pars2['input'], 
+              pars2['H_i'],
+              pars2['H_j'], 
+              pars2['H_k'],
+              pars2['Weights'],
+              pars2['regularisation_parameter'],
+              pars2['iterations'])
+
+rms = rmse(Im, nltv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nltv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____________FGP-dTV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dtv_cpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+rms = rmse(Im, fgp_dtv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("__________Total nuclear Variation__________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TNV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+channelsNo = 5
+noisyVol = np.zeros((channelsNo,N,M),dtype='float32')
+idealVol = np.zeros((channelsNo,N,M),dtype='float32')
+
+for i in range (channelsNo):
+    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    idealVol[i,:,:] = Im
+
+# set parameters
+pars = {'algorithm' : TNV, \
+        'input' : noisyVol,\
+        'regularisation_parameter': 0.04, \
+        'number_of_iterations' : 200 ,\
+        'tolerance_constant':1e-05
+        }
+        
+print ("#############TNV CPU#################")
+start_time = timeit.default_timer()
+tnv_cpu = TNV(pars['input'],           
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'])
+             
+rms = rmse(idealVol, tnv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tnv_cpu[3,:,:], cmap="gray")
+plt.title('{}'.format('CPU results'))
diff --git a/demos/demo_cpu_regularisers3D.py b/demos/demo_cpu_regularisers3D.py
new file mode 100644
index 0000000..56baf13
--- /dev/null
+++ b/demos/demo_cpu_regularisers3D.py
@@ -0,0 +1,458 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of 3D CPU regularisers 
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join( "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+
+# change dims to check that modules work with non-squared images
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+slices = 15
+
+noisyVol = np.zeros((slices,N,M),dtype='float32')
+noisyRef = np.zeros((slices,N,M),dtype='float32')
+idealVol = np.zeros((slices,N,M),dtype='float32')
+
+for i in range (slices):
+    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
+    idealVol[i,:,:] = Im
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________ROF-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of ROF-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy 15th slice of a volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 500,\
+        'time_marching_parameter': 0.0025
+        }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu3D = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'cpu')
+rms = rmse(idealVol, rof_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using ROF-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu3D = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(idealVol, fgp_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using FGP-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of SB-TV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu3D = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')
+             
+rms = rmse(idealVol, sb_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using SB-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________LLT-ROF (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : noisyVol,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.015, \
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+
+print ("#############LLT ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_cpu3D = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+
+rms = rmse(idealVol, lltrof_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using LLT-ROF'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________TGV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu3D = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+
+rms = rmse(idealVol, tgv_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using TGV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("________________NDF (3D)___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NDF regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+        
+print ("#############NDF CPU################")
+start_time = timeit.default_timer()
+ndf_cpu3D = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'])  
+             
+rms = rmse(idealVol, ndf_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using NDF iterations'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of Diff4th regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : noisyVol,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############Diff4th CPU################")
+start_time = timeit.default_timer()
+diff4th_cpu3D = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'])  
+             
+rms = rmse(idealVol, diff4th_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4th_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using DIFF4th iterations'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-dTV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV,\
+        'input' : noisyVol,\
+        'refdata' : noisyRef,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dTV_cpu3D = FGP_dTV(pars['input'],
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'],
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+             
+rms = rmse(idealVol, fgp_dTV_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV'))
+#%%
diff --git a/demos/demo_cpu_vs_gpu_regularisers.py b/demos/demo_cpu_vs_gpu_regularisers.py
new file mode 100644
index 0000000..5ce8da4
--- /dev/null
+++ b/demos/demo_cpu_vs_gpu_regularisers.py
@@ -0,0 +1,790 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of CPU implementation against the GPU one
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from ccpi.filters.regularisers import PatchSelect
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+
+filename = os.path.join("data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)                     
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________ROF-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of ROF-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 4500,\
+        'time_marching_parameter': 0.00002
+        }
+print ("#############ROF TV CPU####################")
+start_time = timeit.default_timer()
+rof_cpu = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'cpu')
+rms = rmse(Im, rof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############ROF TV GPU##################")
+start_time = timeit.default_timer()
+rof_gpu = ROF_TV(pars['input'], 
+                     pars['regularisation_parameter'],
+                     pars['number_of_iterations'], 
+                     pars['time_marching_parameter'],'gpu')
+                     
+rms = rmse(Im, rof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = ROF_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(rof_cpu))
+diff_im = abs(rof_cpu - rof_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of FGP-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :1200 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP TV CPU####################")
+start_time = timeit.default_timer()
+fgp_cpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, fgp_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############FGP TV GPU##################")
+start_time = timeit.default_timer()
+fgp_gpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(fgp_cpu))
+diff_im = abs(fgp_cpu - fgp_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________SB-TV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-05,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############SB-TV CPU####################")
+start_time = timeit.default_timer()
+sb_cpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'cpu')  
+             
+             
+rms = rmse(Im, sb_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+sb_gpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, sb_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = SB_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(sb_cpu))
+diff_im = abs(sb_cpu - sb_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________TGV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of TGV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :400 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_cpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'cpu')
+             
+rms = rmse(Im, tgv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############TGV GPU##################")
+start_time = timeit.default_timer()
+tgv_gpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')
+                                   
+rms = rmse(Im, tgv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = TGV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(tgv_gpu))
+diff_im = abs(tgv_cpu - tgv_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________LLT-ROF bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of LLT-ROF regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : u0,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.01, \
+        'number_of_iterations' :4500 ,\
+        'time_marching_parameter' :0.00002 ,\
+        }
+        
+print ("#############LLT- ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_cpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+
+rms = rmse(Im, lltrof_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("#############LLT- ROF GPU####################")
+start_time = timeit.default_timer()
+lltrof_gpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+
+rms = rmse(Im, lltrof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = LLT_ROF
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(lltrof_gpu))
+diff_im = abs(lltrof_cpu - lltrof_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________NDF bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of NDF regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : u0,\
+        'regularisation_parameter':0.06, \
+        'edge_parameter':0.04,\
+        'number_of_iterations' :1000 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+        
+print ("#############NDF CPU####################")
+start_time = timeit.default_timer()
+ndf_cpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'cpu')
+             
+rms = rmse(Im, ndf_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+print ("##############NDF GPU##################")
+start_time = timeit.default_timer()
+ndf_gpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'gpu')
+             
+rms = rmse(Im, ndf_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = NDF
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(ndf_cpu))
+diff_im = abs(ndf_cpu - ndf_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of Diff4th regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.001
+        }
+
+print ("#############Diff4th CPU####################")
+start_time = timeit.default_timer()
+diff4th_cpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'cpu')
+             
+rms = rmse(Im, diff4th_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4th_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############Diff4th GPU##################")
+start_time = timeit.default_timer()
+diff4th_gpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 'gpu')
+             
+rms = rmse(Im, diff4th_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = Diff4th
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4th_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(diff4th_cpu))
+diff_im = abs(diff4th_cpu - diff4th_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :1000 ,\
+        'tolerance_constant':1e-07,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dtv_cpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+             
+rms = rmse(Im, fgp_dtv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############FGP dTV GPU##################")
+start_time = timeit.default_timer()
+fgp_dtv_gpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+rms = rmse(Im, fgp_dtv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_dTV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(fgp_dtv_cpu))
+diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____Non-local regularisation bench_________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Comparison of Nonlocal TV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars = {'algorithm' : PatchSelect, \
+        'input' : u0,\
+        'searchwindow': 7, \
+        'patchwindow': 2,\
+        'neighbours' : 15 ,\
+        'edge_parameter':0.18}
+
+print ("############## Nonlocal Patches on CPU##################")
+start_time = timeit.default_timer()
+H_i, H_j, WeightsCPU = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'cpu')
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+
+print ("############## Nonlocal Patches on GPU##################")
+start_time = timeit.default_timer()
+start_time = timeit.default_timer()
+H_i, H_j, WeightsGPU = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'gpu')
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(u0))
+diff_im = abs(WeightsCPU[0,:,:] - WeightsGPU[0,:,:])
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,2,2)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
+#%%
\ No newline at end of file
diff --git a/demos/demo_gpu_regularisers.py b/demos/demo_gpu_regularisers.py
new file mode 100644
index 0000000..bc9baf2
--- /dev/null
+++ b/demos/demo_gpu_regularisers.py
@@ -0,0 +1,518 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of GPU regularisers
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from ccpi.filters.regularisers import PatchSelect, NLTV
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join( "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)                     
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________ROF-TV regulariser_____________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the ROF-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 1200,\
+        'time_marching_parameter': 0.0025
+        }
+print ("##############ROF TV GPU##################")
+start_time = timeit.default_timer()
+rof_gpu = ROF_TV(pars['input'], 
+                     pars['regularisation_parameter'],
+                     pars['number_of_iterations'], 
+                     pars['time_marching_parameter'],'gpu')
+                     
+rms = rmse(Im, rof_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = ROF_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-TV regulariser_____________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the FGP-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :1200 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############FGP TV GPU##################")
+start_time = timeit.default_timer()
+fgp_gpu = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________SB-TV regulariser______________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the SB-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :150 ,\
+        'tolerance_constant':1e-06,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############SB TV GPU##################")
+start_time = timeit.default_timer()
+sb_gpu = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, sb_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = SB_TV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____Total Generalised Variation (2D)______")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : u0,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :1250 ,\
+        'LipshitzConstant' :12 ,\
+        }
+        
+print ("#############TGV CPU####################")
+start_time = timeit.default_timer()
+tgv_gpu = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')  
+             
+             
+rms = rmse(Im, tgv_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("______________LLT- ROF (2D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : u0,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.01, \
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+        
+print ("#############LLT- ROF GPU####################")
+start_time = timeit.default_timer()
+lltrof_gpu = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+             
+             
+rms = rmse(Im, lltrof_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________NDF regulariser_____________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the NDF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : u0,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+
+print ("##############NDF GPU##################")
+start_time = timeit.default_timer()
+ndf_gpu = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'gpu')  
+             
+rms = rmse(Im, ndf_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = NDF
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (2D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of Diff4th regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : u0,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############DIFF4th CPU################")
+start_time = timeit.default_timer()
+diff4_gpu = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+             
+rms = rmse(Im, diff4_gpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal patches pre-calculation____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+start_time = timeit.default_timer()
+# set parameters
+pars = {'algorithm' : PatchSelect, \
+        'input' : u0,\
+        'searchwindow': 7, \
+        'patchwindow': 2,\
+        'neighbours' : 15 ,\
+        'edge_parameter':0.18}
+
+H_i, H_j, Weights = PatchSelect(pars['input'], 
+              pars['searchwindow'],
+              pars['patchwindow'], 
+              pars['neighbours'],
+              pars['edge_parameter'],'gpu')
+              
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+"""
+plt.figure()
+plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
+plt.show()
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Nonlocal Total Variation penalty____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NLTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+pars2 = {'algorithm' : NLTV, \
+        'input' : u0,\
+        'H_i': H_i, \
+        'H_j': H_j,\
+        'H_k' : 0,\
+        'Weights' : Weights,\
+        'regularisation_parameter': 0.02,\
+        'iterations': 3
+        }
+start_time = timeit.default_timer()
+nltv_cpu = NLTV(pars2['input'], 
+              pars2['H_i'],
+              pars2['H_j'], 
+              pars2['H_k'],
+              pars2['Weights'],
+              pars2['regularisation_parameter'],
+              pars2['iterations'])
+
+rms = rmse(Im, nltv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(nltv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############FGP dTV GPU##################")
+start_time = timeit.default_timer()
+fgp_dtv_gpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_dtv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_dTV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
diff --git a/demos/demo_gpu_regularisers3D.py b/demos/demo_gpu_regularisers3D.py
new file mode 100644
index 0000000..2f49cb9
--- /dev/null
+++ b/demos/demo_gpu_regularisers3D.py
@@ -0,0 +1,460 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Feb 22 11:39:43 2018
+
+Demonstration of GPU regularisers
+
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import timeit
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from qualitymetrics import rmse
+###############################################################################
+def printParametersToString(pars):
+        txt = r''
+        for key, value in pars.items():
+            if key== 'algorithm' :
+                txt += "{0} = {1}".format(key, value.__name__)
+            elif key == 'input':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
+            else:
+                txt += "{0} = {1}".format(key, value)
+            txt += '\n'
+        return txt
+###############################################################################
+#%%
+filename = os.path.join( "data" ,"lena_gray_512.tif")
+
+# read image
+Im = plt.imread(filename)                     
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+u0 = Im + np.random.normal(loc = 0 ,
+                                  scale = perc * Im , 
+                                  size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+(N,M) = np.shape(u0)
+# map the u0 u0->u0>0
+# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
+"""
+M = M-100
+u_ref2 = np.zeros([N,M],dtype='float32')
+u_ref2[:,0:M] = u_ref[:,0:M]
+u_ref = u_ref2
+del u_ref2
+
+u02 = np.zeros([N,M],dtype='float32')
+u02[:,0:M] = u0[:,0:M]
+u0 = u02
+del u02
+
+Im2 = np.zeros([N,M],dtype='float32')
+Im2[:,0:M] = Im[:,0:M]
+Im = Im2
+del Im2
+"""
+
+
+slices = 20
+
+filename = os.path.join( "data" ,"lena_gray_512.tif")
+Im = plt.imread(filename)
+Im = np.asarray(Im, dtype='float32')
+
+Im = Im/255
+perc = 0.05
+
+noisyVol = np.zeros((slices,N,N),dtype='float32')
+noisyRef = np.zeros((slices,N,N),dtype='float32')
+idealVol = np.zeros((slices,N,N),dtype='float32')
+
+for i in range (slices):
+    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
+    idealVol[i,:,:] = Im
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________ROF-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of ROF-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy 15th slice of a volume')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm': ROF_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04,\
+        'number_of_iterations': 500,\
+        'time_marching_parameter': 0.0025        
+        }
+print ("#############ROF TV GPU####################")
+start_time = timeit.default_timer()
+rof_gpu3D = ROF_TV(pars['input'],
+             pars['regularisation_parameter'],
+             pars['number_of_iterations'],
+             pars['time_marching_parameter'],'gpu')
+rms = rmse(idealVol, rof_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(rof_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using ROF-TV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############FGP TV GPU####################")
+start_time = timeit.default_timer()
+fgp_gpu3D = FGP_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, fgp_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using FGP-TV'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________SB-TV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of SB-TV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : SB_TV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :100 ,\
+        'tolerance_constant':1e-05,\
+        'methodTV': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############SB TV GPU####################")
+start_time = timeit.default_timer()
+sb_gpu3D = SB_TV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['methodTV'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, sb_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(sb_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using SB-TV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________LLT-ROF (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : LLT_ROF, \
+        'input' : noisyVol,\
+        'regularisation_parameterROF':0.04, \
+        'regularisation_parameterLLT':0.015, \
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter' :0.0025 ,\
+        }
+
+print ("#############LLT ROF CPU####################")
+start_time = timeit.default_timer()
+lltrof_gpu3D = LLT_ROF(pars['input'], 
+              pars['regularisation_parameterROF'],
+              pars['regularisation_parameterLLT'],
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+
+rms = rmse(idealVol, lltrof_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(lltrof_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using LLT-ROF'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________TGV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of TGV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : TGV, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.04, \
+        'alpha1':1.0,\
+        'alpha0':2.0,\
+        'number_of_iterations' :600 ,\
+        'LipshitzConstant' :12 ,\
+        }
+
+print ("#############TGV GPU####################")
+start_time = timeit.default_timer()
+tgv_gpu3D = TGV(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['alpha1'],
+              pars['alpha0'],
+              pars['number_of_iterations'],
+              pars['LipshitzConstant'],'gpu')
+             
+
+rms = rmse(idealVol, tgv_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(tgv_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using TGV'))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________NDF-TV (3D)_________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of NDF regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : NDF, \
+        'input' : noisyVol,\
+        'regularisation_parameter':0.025, \
+        'edge_parameter':0.015,\
+        'number_of_iterations' :500 ,\
+        'time_marching_parameter':0.025,\
+        'penalty_type':  1
+        }
+
+print ("#############NDF GPU####################")
+start_time = timeit.default_timer()
+ndf_gpu3D = NDF(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'], 
+              pars['penalty_type'],'gpu')
+
+rms = rmse(idealVol, ndf_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(ndf_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using NDF'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("___Anisotropic Diffusion 4th Order (3D)____")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of DIFF4th regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : Diff4th, \
+        'input' : noisyVol,\
+        'regularisation_parameter':3.5, \
+        'edge_parameter':0.02,\
+        'number_of_iterations' :300 ,\
+        'time_marching_parameter':0.0015
+        }
+        
+print ("#############DIFF4th CPU################")
+start_time = timeit.default_timer()
+diff4_gpu3D = Diff4th(pars['input'], 
+              pars['regularisation_parameter'],
+              pars['edge_parameter'], 
+              pars['number_of_iterations'],
+              pars['time_marching_parameter'],'gpu')
+             
+rms = rmse(idealVol, diff4_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(diff4_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-dTV (3D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure()
+plt.suptitle('Performance of FGP-dTV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : noisyVol,\
+        'refdata' : noisyRef,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############FGP TV GPU####################")
+start_time = timeit.default_timer()
+fgp_dTV_gpu3D = FGP_dTV(pars['input'],
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'],
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, fgp_dTV_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV'))
+#%%
diff --git a/demos/images/TV_vs_NLTV.jpg b/demos/images/TV_vs_NLTV.jpg
new file mode 100644
index 0000000..e976512
Binary files /dev/null and b/demos/images/TV_vs_NLTV.jpg differ
diff --git a/demos/images/probl.pdf b/demos/images/probl.pdf
new file mode 100644
index 0000000..6a06021
Binary files /dev/null and b/demos/images/probl.pdf differ
diff --git a/demos/images/probl.png b/demos/images/probl.png
new file mode 100644
index 0000000..af0e852
Binary files /dev/null and b/demos/images/probl.png differ
diff --git a/demos/images/reg_penalties.jpg b/demos/images/reg_penalties.jpg
new file mode 100644
index 0000000..923d5c4
Binary files /dev/null and b/demos/images/reg_penalties.jpg differ
diff --git a/demos/qualitymetrics.py b/demos/qualitymetrics.py
new file mode 100644
index 0000000..850829e
--- /dev/null
+++ b/demos/qualitymetrics.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 21 13:34:32 2018
+# quality metrics
+@authors: Daniil Kazantsev, Edoardo Pasca
+"""
+import numpy as np
+
+def nrmse(im1, im2):
+    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
+    max_val = max(np.max(im1), np.max(im2))
+    min_val = min(np.min(im1), np.min(im2))
+    return 1 - (rmse / (max_val - min_val))
+    
+def rmse(im1, im2):
+    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
+    return rmse
diff --git a/docs/data/SinoInpaint.mat b/docs/data/SinoInpaint.mat
deleted file mode 100644
index d748fb4..0000000
Binary files a/docs/data/SinoInpaint.mat and /dev/null differ
diff --git a/docs/data/lena_gray_512.tif b/docs/data/lena_gray_512.tif
deleted file mode 100644
index f80cafc..0000000
Binary files a/docs/data/lena_gray_512.tif and /dev/null differ
diff --git a/docs/demos/demoMatlab_3Ddenoise.m b/docs/demos/demoMatlab_3Ddenoise.m
deleted file mode 100644
index 0c331a4..0000000
--- a/docs/demos/demoMatlab_3Ddenoise.m
+++ /dev/null
@@ -1,178 +0,0 @@
-% Volume (3D) denoising demo using CCPi-RGL
-clear; close all
-Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
-Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
-Path3 = sprintf(['..' filesep 'supp'], 1i);
-addpath(Path1);
-addpath(Path2);
-addpath(Path3);
-
-N = 512; 
-slices = 7;
-vol3D = zeros(N,N,slices, 'single');
-Ideal3D = zeros(N,N,slices, 'single');
-Im = double(imread('lena_gray_512.tif'))/255;  % loading image
-for i = 1:slices
-vol3D(:,:,i) = Im + .05*randn(size(Im)); 
-Ideal3D(:,:,i) = Im;
-end
-vol3D(vol3D < 0) = 0;
-figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
-
-
-lambda_reg = 0.03; % regularsation parameter for all methods
-%%
-fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
-tau_rof = 0.0025; % time-marching constant 
-iter_rof = 300; % number of ROF iterations
-tic; u_rof = ROF_TV(single(vol3D), lambda_reg, iter_rof, tau_rof); toc; 
-energyfunc_val_rof = TV_energy(single(u_rof),single(vol3D),lambda_reg, 1);  % get energy function value
-rmse_rof = (RMSE(Ideal3D(:),u_rof(:)));
-fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rof);
-figure; imshow(u_rof(:,:,7), [0 1]); title('ROF-TV denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the ROF-TV model (GPU) \n');
-% tau_rof = 0.0025; % time-marching constant 
-% iter_rof = 300; % number of ROF iterations
-% tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_reg, iter_rof, tau_rof); toc;
-% rmse_rofG = (RMSE(Ideal3D(:),u_rofG(:)));
-% fprintf('%s %f \n', 'RMSE error for ROF is:', rmse_rofG);
-% figure; imshow(u_rofG(:,:,7), [0 1]); title('ROF-TV denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using the FGP-TV model (CPU) \n');
-iter_fgp = 300; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
-tic; u_fgp = FGP_TV(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
-energyfunc_val_fgp = TV_energy(single(u_fgp),single(vol3D),lambda_reg, 1); % get energy function value
-rmse_fgp = (RMSE(Ideal3D(:),u_fgp(:)));
-fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgp);
-figure; imshow(u_fgp(:,:,7), [0 1]); title('FGP-TV denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the FGP-TV model (GPU) \n');
-% iter_fgp = 300; % number of FGP iterations
-% epsil_tol =  1.0e-05; % tolerance
-% tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_reg, iter_fgp, epsil_tol); toc; 
-% rmse_fgpG = (RMSE(Ideal3D(:),u_fgpG(:)));
-% fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmse_fgpG);
-% figure; imshow(u_fgpG(:,:,7), [0 1]); title('FGP-TV denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using the SB-TV model (CPU) \n');
-iter_sb = 150; % number of SB iterations
-epsil_tol =  1.0e-05; % tolerance
-tic; u_sb = SB_TV(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
-energyfunc_val_sb = TV_energy(single(u_sb),single(vol3D),lambda_reg, 1);  % get energy function value
-rmse_sb = (RMSE(Ideal3D(:),u_sb(:)));
-fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sb);
-figure; imshow(u_sb(:,:,7), [0 1]); title('SB-TV denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the SB-TV model (GPU) \n');
-% iter_sb = 150; % number of SB iterations
-% epsil_tol =  1.0e-05; % tolerance
-% tic; u_sbG = SB_TV_GPU(single(vol3D), lambda_reg, iter_sb, epsil_tol); toc; 
-% rmse_sbG = (RMSE(Ideal3D(:),u_sbG(:)));
-% fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmse_sbG);
-% figure; imshow(u_sbG(:,:,7), [0 1]); title('SB-TV denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using the ROF-LLT model (CPU) \n');
-lambda_ROF = lambda_reg; % ROF regularisation parameter
-lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
-iter_LLT = 300; % iterations 
-tau_rof_llt = 0.0025; % time-marching constant 
-tic; u_rof_llt = LLT_ROF(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt(:)));
-fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
-figure; imshow(u_rof_llt(:,:,7), [0 1]); title('ROF-LLT denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using the ROF-LLT model (GPU) \n');
-% lambda_ROF = lambda_reg; % ROF regularisation parameter
-% lambda_LLT = lambda_reg*0.35; % LLT regularisation parameter
-% iter_LLT = 300; % iterations 
-% tau_rof_llt = 0.0025; % time-marching constant 
-% tic; u_rof_llt_g = LLT_ROF_GPU(single(vol3D), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-% rmse_rof_llt = (RMSE(Ideal3D(:),u_rof_llt_g(:)));
-% fprintf('%s %f \n', 'RMSE error for ROF-LLT is:', rmse_rof_llt);
-% figure; imshow(u_rof_llt_g(:,:,7), [0 1]); title('ROF-LLT denoised volume (GPU)');
-%%
-fprintf('Denoise a volume using Nonlinear-Diffusion model (CPU) \n');
-iter_diff = 300; % number of diffusion iterations
-lambda_regDiff = 0.025; % regularisation for the diffusivity 
-sigmaPar = 0.015; % edge-preserving parameter
-tau_param = 0.025; % time-marching constant 
-tic; u_diff = NonlDiff(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-rmse_diff = (RMSE(Ideal3D(:),u_diff(:)));
-fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
-figure; imshow(u_diff(:,:,7), [0 1]); title('Diffusion denoised volume (CPU)');
-%%
-% fprintf('Denoise a volume using Nonlinear-Diffusion model (GPU) \n');
-% iter_diff = 300; % number of diffusion iterations
-% lambda_regDiff = 0.025; % regularisation for the diffusivity 
-% sigmaPar = 0.015; % edge-preserving parameter
-% tau_param = 0.025; % time-marching constant 
-% tic; u_diff_g = NonlDiff_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-% rmse_diff = (RMSE(Ideal3D(:),u_diff_g(:)));
-% fprintf('%s %f \n', 'RMSE error for Diffusion is:', rmse_diff);
-% figure; imshow(u_diff_g(:,:,7), [0 1]); title('Diffusion denoised volume (GPU)');
-%%
-fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
-iter_diff = 300; % number of diffusion iterations
-lambda_regDiff = 3.5; % regularisation for the diffusivity 
-sigmaPar = 0.02; % edge-preserving parameter
-tau_param = 0.0015; % time-marching constant 
-tic; u_diff4 = Diffusion_4thO(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-rmse_diff4 = (RMSE(Ideal3D(:),u_diff4(:)));
-fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
-figure; imshow(u_diff4(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (CPU)');
-%%
-% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
-% iter_diff = 300; % number of diffusion iterations
-% lambda_regDiff = 3.5; % regularisation for the diffusivity 
-% sigmaPar = 0.02; % edge-preserving parameter
-% tau_param = 0.0015; % time-marching constant 
-% tic; u_diff4_g = Diffusion_4thO_GPU(single(vol3D), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-% rmse_diff4 = (RMSE(Ideal3D(:),u_diff4_g(:)));
-% fprintf('%s %f \n', 'RMSE error for Anis.Diff of 4th order is:', rmse_diff4);
-% figure; imshow(u_diff4_g(:,:,7), [0 1]); title('Diffusion 4thO denoised volume (GPU)');
-%%
-fprintf('Denoise using the TGV model (CPU) \n');
-lambda_TGV = 0.03; % regularisation parameter
-alpha1 = 1.0; % parameter to control the first-order term
-alpha0 = 2.0; % parameter to control the second-order term
-iter_TGV = 500; % number of Primal-Dual iterations for TGV
-tic; u_tgv = TGV(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
-rmseTGV = RMSE(Ideal3D(:),u_tgv(:));
-fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
-figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)');
-%%
-%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
-fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
-
-% create another volume (reference) with slightly less amount of noise
-vol3D_ref = zeros(N,N,slices, 'single');
-for i = 1:slices
-vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
-end
-vol3D_ref(vol3D_ref < 0) = 0;
-% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-
-iter_fgp = 300; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
-eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-figure; imshow(u_fgp_dtv(:,:,7), [0 1]); title('FGP-dTV denoised volume (CPU)');
-%%
-fprintf('Denoise a volume using the FGP-dTV model (GPU) \n');
-
-% create another volume (reference) with slightly less amount of noise
-vol3D_ref = zeros(N,N,slices, 'single');
-for i = 1:slices
-vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
-end
-vol3D_ref(vol3D_ref < 0) = 0;
-% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-
-iter_fgp = 300; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
-eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-figure; imshow(u_fgp_dtv_g(:,:,7), [0 1]); title('FGP-dTV denoised volume (GPU)');
-%%
diff --git a/docs/demos/demoMatlab_denoise.m b/docs/demos/demoMatlab_denoise.m
deleted file mode 100644
index 14d3096..0000000
--- a/docs/demos/demoMatlab_denoise.m
+++ /dev/null
@@ -1,189 +0,0 @@
-% Image (2D) denoising demo using CCPi-RGL
-clear; close all
-fsep = '/';
-
-Path1 = sprintf(['..' fsep 'mex_compile' fsep 'installed'], 1i);
-Path2 = sprintf(['..' fsep '..' fsep '..' fsep 'data' fsep], 1i);
-Path3 = sprintf(['..' fsep 'supp'], 1i);
-addpath(Path1); addpath(Path2); addpath(Path3);
-
-Im = double(imread('lena_gray_512.tif'))/255;  % loading image
-u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-figure; imshow(u0, [0 1]); title('Noisy image');
-
-lambda_reg = 0.03; % regularsation parameter for all methods
-%%
-fprintf('Denoise using the ROF-TV model (CPU) \n');
-tau_rof = 0.0025; % time-marching constant 
-iter_rof = 750; % number of ROF iterations
-tic; u_rof = ROF_TV(single(u0), lambda_reg, iter_rof, tau_rof); toc; 
-energyfunc_val_rof = TV_energy(single(u_rof),single(u0),lambda_reg, 1);  % get energy function value
-rmseROF = (RMSE(u_rof(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for ROF-TV is:', rmseROF);
-figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
-%%
-% fprintf('Denoise using the ROF-TV model (GPU) \n');
-% tau_rof = 0.0025; % time-marching constant 
-% iter_rof = 750; % number of ROF iterations
-% tic; u_rofG = ROF_TV_GPU(single(u0), lambda_reg, iter_rof, tau_rof); toc;
-% figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
-%%
-fprintf('Denoise using the FGP-TV model (CPU) \n');
-iter_fgp = 1000; % number of FGP iterations
-epsil_tol =  1.0e-06; % tolerance
-tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
-energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value
-rmseFGP = (RMSE(u_fgp(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for FGP-TV is:', rmseFGP);
-figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
-
-%%
-% fprintf('Denoise using the FGP-TV model (GPU) \n');
-% iter_fgp = 1000; % number of FGP iterations
-% epsil_tol =  1.0e-05; % tolerance
-% tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; 
-% figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
-%%
-fprintf('Denoise using the SB-TV model (CPU) \n');
-iter_sb = 150; % number of SB iterations
-epsil_tol =  1.0e-06; % tolerance
-tic; u_sb = SB_TV(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
-energyfunc_val_sb = TV_energy(single(u_sb),single(u0),lambda_reg, 1);  % get energy function value
-rmseSB = (RMSE(u_sb(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for SB-TV is:', rmseSB);
-figure; imshow(u_sb, [0 1]); title('SB-TV denoised image (CPU)');
-%%
-% fprintf('Denoise using the SB-TV model (GPU) \n');
-% iter_sb = 150; % number of SB iterations
-% epsil_tol =  1.0e-06; % tolerance
-% tic; u_sbG = SB_TV_GPU(single(u0), lambda_reg, iter_sb, epsil_tol); toc; 
-% figure; imshow(u_sbG, [0 1]); title('SB-TV denoised image (GPU)');
-%%
-fprintf('Denoise using the TGV model (CPU) \n');
-lambda_TGV = 0.045; % regularisation parameter
-alpha1 = 1.0; % parameter to control the first-order term
-alpha0 = 2.0; % parameter to control the second-order term
-iter_TGV = 2000; % number of Primal-Dual iterations for TGV
-tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
-rmseTGV = (RMSE(u_tgv(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV);
-figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)');
-%%
-% fprintf('Denoise using the TGV model (GPU) \n');
-% lambda_TGV = 0.045; % regularisation parameter
-% alpha1 = 1.0; % parameter to control the first-order term
-% alpha0 = 2.0; % parameter to control the second-order term
-% iter_TGV = 2000; % number of Primal-Dual iterations for TGV
-% tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; 
-% rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:)));
-% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu);
-% figure; imshow(u_tgv_gpu, [0 1]); title('TGV denoised image (GPU)');
-%%
-fprintf('Denoise using the ROF-LLT model (CPU) \n');
-lambda_ROF = lambda_reg; % ROF regularisation parameter
-lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
-iter_LLT = 1; % iterations 
-tau_rof_llt = 0.0025; % time-marching constant 
-tic; u_rof_llt = LLT_ROF(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-rmseROFLLT = (RMSE(u_rof_llt(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT);
-figure; imshow(u_rof_llt, [0 1]); title('ROF-LLT denoised image (CPU)');
-%%
-% fprintf('Denoise using the ROF-LLT model (GPU) \n');
-% lambda_ROF = lambda_reg; % ROF regularisation parameter
-% lambda_LLT = lambda_reg*0.45; % LLT regularisation parameter
-% iter_LLT = 500; % iterations 
-% tau_rof_llt = 0.0025; % time-marching constant 
-% tic; u_rof_llt_g = LLT_ROF_GPU(single(u0), lambda_ROF, lambda_LLT, iter_LLT, tau_rof_llt); toc; 
-% rmseROFLLT_g = (RMSE(u_rof_llt_g(:),Im(:)));
-% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseROFLLT_g);
-% figure; imshow(u_rof_llt_g, [0 1]); title('ROF-LLT denoised image (GPU)');
-%%
-fprintf('Denoise using Nonlinear-Diffusion model (CPU) \n');
-iter_diff = 800; % number of diffusion iterations
-lambda_regDiff = 0.025; % regularisation for the diffusivity 
-sigmaPar = 0.015; % edge-preserving parameter
-tau_param = 0.025; % time-marching constant 
-tic; u_diff = NonlDiff(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-rmseDiffus = (RMSE(u_diff(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Nonlinear Diffusion is:', rmseDiffus);
-figure; imshow(u_diff, [0 1]); title('Diffusion denoised image (CPU)');
-%%
-% fprintf('Denoise using Nonlinear-Diffusion model (GPU) \n');
-% iter_diff = 800; % number of diffusion iterations
-% lambda_regDiff = 0.025; % regularisation for the diffusivity 
-% sigmaPar = 0.015; % edge-preserving parameter
-% tau_param = 0.025; % time-marching constant 
-% tic; u_diff_g = NonlDiff_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-% figure; imshow(u_diff_g, [0 1]); title('Diffusion denoised image (GPU)');
-%%
-fprintf('Denoise using Fourth-order anisotropic diffusion model (CPU) \n');
-iter_diff = 800; % number of diffusion iterations
-lambda_regDiff = 3.5; % regularisation for the diffusivity 
-sigmaPar = 0.02; % edge-preserving parameter
-tau_param = 0.0015; % time-marching constant 
-tic; u_diff4 = Diffusion_4thO(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-rmseDiffHO = (RMSE(u_diff4(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Fourth-order anisotropic diffusion is:', rmseDiffHO);
-figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)');
-%%
-% fprintf('Denoise using Fourth-order anisotropic diffusion model (GPU) \n');
-% iter_diff = 800; % number of diffusion iterations
-% lambda_regDiff = 3.5; % regularisation for the diffusivity 
-% sigmaPar = 0.02; % edge-preserving parameter
-% tau_param = 0.0015; % time-marching constant 
-% tic; u_diff4_g = Diffusion_4thO_GPU(single(u0), lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-% figure; imshow(u_diff4_g, [0 1]); title('Diffusion 4thO denoised image (GPU)');
-%%
-fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n');
-SearchingWindow = 7;
-PatchWindow = 2;
-NeighboursNumber = 20; % the number of neibours to include
-h = 0.23; % edge related parameter for NLM
-tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); toc;
-%%
-fprintf('Denoise using Non-local Total Variation (CPU) \n');
-iter_nltv = 3; % number of nltv iterations
-lambda_nltv = 0.05; % regularisation parameter for nltv
-tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc; 
-rmse_nltv = (RMSE(u_nltv(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv);
-figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)');
-%%
-%>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< %
-
-fprintf('Denoise using the FGP-dTV model (CPU) \n');
-% create another image (reference) with slightly less amount of noise
-u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
-% u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-
-iter_fgp = 1000; % number of FGP iterations
-epsil_tol =  1.0e-06; % tolerance
-eta =  0.2; % Reference image gradient smoothing constant
-tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-rmse_dTV= (RMSE(u_fgp_dtv(:),Im(:)));
-fprintf('%s %f \n', 'RMSE error for Directional Total Variation (dTV) is:', rmse_dTV);
-figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
-%%
-% fprintf('Denoise using the FGP-dTV model (GPU) \n');
-% % create another image (reference) with slightly less amount of noise
-% u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
-% % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
-% 
-% iter_fgp = 1000; % number of FGP iterations
-% epsil_tol =  1.0e-06; % tolerance
-% eta =  0.2; % Reference image gradient smoothing constant
-% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_reg, iter_fgp, epsil_tol, eta); toc; 
-% figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)');
-%%
-fprintf('Denoise using the TNV prior (CPU) \n');
-slices = 5; N = 512;
-vol3D = zeros(N,N,slices, 'single');
-for i = 1:slices
-vol3D(:,:,i) = Im + .05*randn(size(Im)); 
-end
-vol3D(vol3D < 0) = 0;
-
-iter_tnv = 200; % number of TNV iterations
-tic; u_tnv = TNV(single(vol3D), lambda_reg, iter_tnv); toc; 
-figure; imshow(u_tnv(:,:,3), [0 1]); title('TNV denoised stack of channels (CPU)');
diff --git a/docs/demos/demoMatlab_inpaint.m b/docs/demos/demoMatlab_inpaint.m
deleted file mode 100644
index 66f9c15..0000000
--- a/docs/demos/demoMatlab_inpaint.m
+++ /dev/null
@@ -1,35 +0,0 @@
-% Image (2D) inpainting demo using CCPi-RGL
-clear; close all
-Path1 = sprintf(['..' filesep 'mex_compile' filesep 'installed'], 1i);
-Path2 = sprintf(['..' filesep '..' filesep '..' filesep 'data' filesep], 1i);
-addpath(Path1);
-addpath(Path2);
-
-load('SinoInpaint.mat');
-Sinogram = Sinogram./max(Sinogram(:));
-Sino_mask = Sinogram.*(1-single(Mask));
-figure; 
-subplot(1,2,1); imshow(Sino_mask, [0 1]); title('Missing data sinogram');
-subplot(1,2,2); imshow(Mask, [0 1]); title('Mask');
-%%
-fprintf('Inpaint using Linear-Diffusion model (CPU) \n');
-iter_diff = 5000; % number of diffusion iterations
-lambda_regDiff = 6000; % regularisation for the diffusivity 
-sigmaPar = 0.0; % edge-preserving parameter
-tau_param = 0.000075; % time-marching constant 
-tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param); toc; 
-figure; imshow(u_diff, [0 1]); title('Linear-Diffusion inpainted sinogram (CPU)');
-%%
-fprintf('Inpaint using Nonlinear-Diffusion model (CPU) \n');
-iter_diff = 1500; % number of diffusion iterations
-lambda_regDiff = 80; % regularisation for the diffusivity 
-sigmaPar = 0.00009; % edge-preserving parameter
-tau_param = 0.000008; % time-marching constant 
-tic; u_diff = NonlDiff_Inp(single(Sino_mask), Mask, lambda_regDiff, sigmaPar, iter_diff, tau_param, 'Huber'); toc; 
-figure; imshow(u_diff, [0 1]); title('Non-Linear Diffusion inpainted sinogram (CPU)');
-%%
-fprintf('Inpaint using Nonlocal Vertical Marching model (CPU) \n');
-Increment = 1; % linear increment for the searching window
-tic; [u_nom,maskupd] = NonlocalMarching_Inpaint(single(Sino_mask), Mask, Increment); toc;
-figure; imshow(u_nom, [0 1]); title('NVM inpainted sinogram (CPU)');
-%%
\ No newline at end of file
diff --git a/docs/demos/demo_cpu_inpainters.py b/docs/demos/demo_cpu_inpainters.py
deleted file mode 100644
index 3b4191b..0000000
--- a/docs/demos/demo_cpu_inpainters.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Demonstration of CPU inpainters
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from scipy import io
-from ccpi.filters.regularisers import NDF_INP, NVM_INP
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'maskData':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-
-# read sinogram and the mask
-filename = os.path.join(".." , ".." , ".." , "data" ,"SinoInpaint.mat")
-sino = io.loadmat(filename)
-sino_full = sino.get('Sinogram')
-Mask = sino.get('Mask')
-[angles_dim,detectors_dim] = sino_full.shape
-sino_full = sino_full/np.max(sino_full)
-#apply mask to sinogram
-sino_cut = sino_full*(1-Mask)
-#sino_cut_new = np.zeros((angles_dim,detectors_dim),'float32')
-#sino_cut_new = sino_cut.copy(order='c')
-#sino_cut_new[:] = sino_cut[:]
-sino_cut_new = np.ascontiguousarray(sino_cut, dtype=np.float32);
-#mask = np.zeros((angles_dim,detectors_dim),'uint8')
-#mask =Mask.copy(order='c')
-#mask[:] = Mask[:]
-mask = np.ascontiguousarray(Mask, dtype=np.uint8);
-
-plt.figure(1)
-plt.subplot(121)
-plt.imshow(sino_cut_new,vmin=0.0, vmax=1)
-plt.title('Missing Data sinogram')
-plt.subplot(122)
-plt.imshow(mask)
-plt.title('Mask')
-plt.show()
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Inpainting using linear diffusion (2D)__")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure(2)
-plt.suptitle('Performance of linear inpainting using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Missing data sinogram')
-imgplot = plt.imshow(sino_cut_new,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF_INP, \
-        'input' : sino_cut_new,\
-        'maskData' : mask,\
-        'regularisation_parameter':5000,\
-        'edge_parameter':0,\
-        'number_of_iterations' :5000 ,\
-        'time_marching_parameter':0.000075,\
-        'penalty_type':0
-        }
-        
-start_time = timeit.default_timer()
-ndf_inp_linear = NDF_INP(pars['input'],
-              pars['maskData'],
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'])
-             
-rms = rmse(sino_full, ndf_inp_linear)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_inp_linear, cmap="gray")
-plt.title('{}'.format('Linear diffusion inpainting results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_Inpainting using nonlinear diffusion (2D)_")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure(3)
-plt.suptitle('Performance of nonlinear diffusion inpainting using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Missing data sinogram')
-imgplot = plt.imshow(sino_cut_new,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF_INP, \
-        'input' : sino_cut_new,\
-        'maskData' : mask,\
-        'regularisation_parameter':80,\
-        'edge_parameter':0.00009,\
-        'number_of_iterations' :1500 ,\
-        'time_marching_parameter':0.000008,\
-        'penalty_type':1
-        }
-        
-start_time = timeit.default_timer()
-ndf_inp_nonlinear = NDF_INP(pars['input'],
-              pars['maskData'],
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'])
-             
-rms = rmse(sino_full, ndf_inp_nonlinear)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_inp_nonlinear, cmap="gray")
-plt.title('{}'.format('Nonlinear diffusion inpainting results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("Inpainting using nonlocal vertical marching")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure(4)
-plt.suptitle('Performance of NVM inpainting using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Missing data sinogram')
-imgplot = plt.imshow(sino_cut,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NVM_INP, \
-        'input' : sino_cut_new,\
-        'maskData' : mask,\
-        'SW_increment': 1,\
-        'number_of_iterations' : 150
-        }
-        
-start_time = timeit.default_timer()
-(nvm_inp, mask_upd) = NVM_INP(pars['input'],
-              pars['maskData'],
-              pars['SW_increment'],
-              pars['number_of_iterations'])
-             
-rms = rmse(sino_full, nvm_inp)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(nvm_inp, cmap="gray")
-plt.title('{}'.format('Nonlocal Vertical Marching inpainting results'))
-#%%
diff --git a/docs/demos/demo_cpu_regularisers.py b/docs/demos/demo_cpu_regularisers.py
deleted file mode 100644
index e6befa9..0000000
--- a/docs/demos/demo_cpu_regularisers.py
+++ /dev/null
@@ -1,572 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of CPU regularisers 
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, TNV, NDF, Diff4th
-from ccpi.filters.regularisers import PatchSelect, NLTV
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255.0
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-
-# change dims to check that modules work with non-squared images
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________ROF-TV (2D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of ROF-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 1200,\
-        'time_marching_parameter': 0.0025        
-        }
-print ("#############ROF TV CPU####################")
-start_time = timeit.default_timer()
-rof_cpu = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'cpu')
-rms = rmse(Im, rof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-TV (2D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :2000 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP TV CPU####################")
-start_time = timeit.default_timer()
-fgp_cpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, fgp_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________SB-TV (2D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of SB-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############SB TV CPU####################")
-start_time = timeit.default_timer()
-sb_cpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, sb_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_____Total Generalised Variation (2D)______")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :1350 ,\
-        'LipshitzConstant' :12 ,\
-        }
-        
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_cpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'cpu')
-             
-             
-rms = rmse(Im, tgv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("______________LLT- ROF (2D)________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : u0,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.01, \
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-        
-print ("#############LLT- ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_cpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-
-rms = rmse(Im, lltrof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("________________NDF (2D)___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NDF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : u0,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':1
-        }
-        
-print ("#############NDF CPU################")
-start_time = timeit.default_timer()
-ndf_cpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'cpu')  
-             
-rms = rmse(Im, ndf_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of Diff4th regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############Diff4th CPU################")
-start_time = timeit.default_timer()
-diff4_cpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-             
-rms = rmse(Im, diff4_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal patches pre-calculation____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-start_time = timeit.default_timer()
-# set parameters
-pars = {'algorithm' : PatchSelect, \
-        'input' : u0,\
-        'searchwindow': 7, \
-        'patchwindow': 2,\
-        'neighbours' : 15 ,\
-        'edge_parameter':0.18}
-
-H_i, H_j, Weights = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'cpu')
-              
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-"""
-plt.figure()
-plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
-plt.show()
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal Total Variation penalty____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NLTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-pars2 = {'algorithm' : NLTV, \
-        'input' : u0,\
-        'H_i': H_i, \
-        'H_j': H_j,\
-        'H_k' : 0,\
-        'Weights' : Weights,\
-        'regularisation_parameter': 0.04,\
-        'iterations': 3
-        }
-start_time = timeit.default_timer()
-nltv_cpu = NLTV(pars2['input'], 
-              pars2['H_i'],
-              pars2['H_j'], 
-              pars2['H_k'],
-              pars2['Weights'],
-              pars2['regularisation_parameter'],
-              pars2['iterations'])
-
-rms = rmse(Im, nltv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(nltv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_____________FGP-dTV (2D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : u0,\
-        'refdata' : u_ref,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :2000 ,\
-        'tolerance_constant':1e-06,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP dTV CPU####################")
-start_time = timeit.default_timer()
-fgp_dtv_cpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')
-             
-rms = rmse(Im, fgp_dtv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("__________Total nuclear Variation__________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TNV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-channelsNo = 5
-noisyVol = np.zeros((channelsNo,N,M),dtype='float32')
-idealVol = np.zeros((channelsNo,N,M),dtype='float32')
-
-for i in range (channelsNo):
-    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
-    idealVol[i,:,:] = Im
-
-# set parameters
-pars = {'algorithm' : TNV, \
-        'input' : noisyVol,\
-        'regularisation_parameter': 0.04, \
-        'number_of_iterations' : 200 ,\
-        'tolerance_constant':1e-05
-        }
-        
-print ("#############TNV CPU#################")
-start_time = timeit.default_timer()
-tnv_cpu = TNV(pars['input'],           
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'])
-             
-rms = rmse(idealVol, tnv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tnv_cpu[3,:,:], cmap="gray")
-plt.title('{}'.format('CPU results'))
diff --git a/docs/demos/demo_cpu_regularisers3D.py b/docs/demos/demo_cpu_regularisers3D.py
deleted file mode 100644
index 2d2fc22..0000000
--- a/docs/demos/demo_cpu_regularisers3D.py
+++ /dev/null
@@ -1,458 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of 3D CPU regularisers 
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-
-# change dims to check that modules work with non-squared images
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-slices = 15
-
-noisyVol = np.zeros((slices,N,M),dtype='float32')
-noisyRef = np.zeros((slices,N,M),dtype='float32')
-idealVol = np.zeros((slices,N,M),dtype='float32')
-
-for i in range (slices):
-    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
-    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
-    idealVol[i,:,:] = Im
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________ROF-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of ROF-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy 15th slice of a volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 500,\
-        'time_marching_parameter': 0.0025
-        }
-print ("#############ROF TV CPU####################")
-start_time = timeit.default_timer()
-rof_cpu3D = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'cpu')
-rms = rmse(idealVol, rof_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using ROF-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-TV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP TV CPU####################")
-start_time = timeit.default_timer()
-fgp_cpu3D = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(idealVol, fgp_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using FGP-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________SB-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of SB-TV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############SB TV CPU####################")
-start_time = timeit.default_timer()
-sb_cpu3D = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'cpu')
-             
-rms = rmse(idealVol, sb_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using SB-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________LLT-ROF (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : noisyVol,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.015, \
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-
-print ("#############LLT ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_cpu3D = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-
-rms = rmse(idealVol, lltrof_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using LLT-ROF'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________TGV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :250 ,\
-        'LipshitzConstant' :12 ,\
-        }
-
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_cpu3D = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'cpu')
-             
-
-rms = rmse(idealVol, tgv_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using TGV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("________________NDF (3D)___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NDF regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-        
-print ("#############NDF CPU################")
-start_time = timeit.default_timer()
-ndf_cpu3D = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'])  
-             
-rms = rmse(idealVol, ndf_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using NDF iterations'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of Diff4th regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : noisyVol,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############Diff4th CPU################")
-start_time = timeit.default_timer()
-diff4th_cpu3D = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'])  
-             
-rms = rmse(idealVol, diff4th_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4th_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using DIFF4th iterations'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-dTV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV,\
-        'input' : noisyVol,\
-        'refdata' : noisyRef,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP dTV CPU####################")
-start_time = timeit.default_timer()
-fgp_dTV_cpu3D = FGP_dTV(pars['input'],
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'],
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')
-             
-             
-rms = rmse(idealVol, fgp_dTV_cpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV'))
-#%%
diff --git a/docs/demos/demo_cpu_vs_gpu_regularisers.py b/docs/demos/demo_cpu_vs_gpu_regularisers.py
deleted file mode 100644
index 230a761..0000000
--- a/docs/demos/demo_cpu_vs_gpu_regularisers.py
+++ /dev/null
@@ -1,790 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of CPU implementation against the GPU one
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from ccpi.filters.regularisers import PatchSelect
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)                     
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________ROF-TV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of ROF-TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 4500,\
-        'time_marching_parameter': 0.00002
-        }
-print ("#############ROF TV CPU####################")
-start_time = timeit.default_timer()
-rof_cpu = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'cpu')
-rms = rmse(Im, rof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############ROF TV GPU##################")
-start_time = timeit.default_timer()
-rof_gpu = ROF_TV(pars['input'], 
-                     pars['regularisation_parameter'],
-                     pars['number_of_iterations'], 
-                     pars['time_marching_parameter'],'gpu')
-                     
-rms = rmse(Im, rof_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = ROF_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(rof_cpu))
-diff_im = abs(rof_cpu - rof_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-TV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of FGP-TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :1200 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP TV CPU####################")
-start_time = timeit.default_timer()
-fgp_cpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, fgp_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-
-print ("##############FGP TV GPU##################")
-start_time = timeit.default_timer()
-fgp_gpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, fgp_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(fgp_cpu))
-diff_im = abs(fgp_cpu - fgp_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________SB-TV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of SB-TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':1e-05,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############SB-TV CPU####################")
-start_time = timeit.default_timer()
-sb_cpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'cpu')  
-             
-             
-rms = rmse(Im, sb_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-
-print ("##############SB TV GPU##################")
-start_time = timeit.default_timer()
-sb_gpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, sb_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = SB_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(sb_cpu))
-diff_im = abs(sb_cpu - sb_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________TGV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of TGV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :400 ,\
-        'LipshitzConstant' :12 ,\
-        }
-        
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_cpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'cpu')
-             
-rms = rmse(Im, tgv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############TGV GPU##################")
-start_time = timeit.default_timer()
-tgv_gpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'gpu')
-                                   
-rms = rmse(Im, tgv_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = TGV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(tgv_gpu))
-diff_im = abs(tgv_cpu - tgv_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________LLT-ROF bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of LLT-ROF regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : u0,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.01, \
-        'number_of_iterations' :4500 ,\
-        'time_marching_parameter' :0.00002 ,\
-        }
-        
-print ("#############LLT- ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_cpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-
-rms = rmse(Im, lltrof_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("#############LLT- ROF GPU####################")
-start_time = timeit.default_timer()
-lltrof_gpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-
-rms = rmse(Im, lltrof_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = LLT_ROF
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(lltrof_gpu))
-diff_im = abs(lltrof_cpu - lltrof_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________NDF bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of NDF regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : u0,\
-        'regularisation_parameter':0.06, \
-        'edge_parameter':0.04,\
-        'number_of_iterations' :1000 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-        
-print ("#############NDF CPU####################")
-start_time = timeit.default_timer()
-ndf_cpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'cpu')
-             
-rms = rmse(Im, ndf_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-
-print ("##############NDF GPU##################")
-start_time = timeit.default_timer()
-ndf_gpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'gpu')
-             
-rms = rmse(Im, ndf_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = NDF
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(ndf_cpu))
-diff_im = abs(ndf_cpu - ndf_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of Diff4th regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.001
-        }
-
-print ("#############Diff4th CPU####################")
-start_time = timeit.default_timer()
-diff4th_cpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'cpu')
-             
-rms = rmse(Im, diff4th_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4th_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############Diff4th GPU##################")
-start_time = timeit.default_timer()
-diff4th_gpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 'gpu')
-             
-rms = rmse(Im, diff4th_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = Diff4th
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4th_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(diff4th_cpu))
-diff_im = abs(diff4th_cpu - diff4th_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-dTV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,4,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : u0,\
-        'refdata' : u_ref,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :1000 ,\
-        'tolerance_constant':1e-07,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-        
-print ("#############FGP dTV CPU####################")
-start_time = timeit.default_timer()
-fgp_dtv_cpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'cpu')
-             
-             
-rms = rmse(Im, fgp_dtv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-
-print ("##############FGP dTV GPU##################")
-start_time = timeit.default_timer()
-fgp_dtv_gpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-rms = rmse(Im, fgp_dtv_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_dTV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,4,3)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(fgp_dtv_cpu))
-diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,4,4)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____Non-local regularisation bench_________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Comparison of Nonlocal TV regulariser using CPU and GPU implementations')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-pars = {'algorithm' : PatchSelect, \
-        'input' : u0,\
-        'searchwindow': 7, \
-        'patchwindow': 2,\
-        'neighbours' : 15 ,\
-        'edge_parameter':0.18}
-
-print ("############## Nonlocal Patches on CPU##################")
-start_time = timeit.default_timer()
-H_i, H_j, WeightsCPU = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'cpu')
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-
-print ("############## Nonlocal Patches on GPU##################")
-start_time = timeit.default_timer()
-start_time = timeit.default_timer()
-H_i, H_j, WeightsGPU = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'gpu')
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-
-print ("--------Compare the results--------")
-tolerance = 1e-05
-diff_im = np.zeros(np.shape(u0))
-diff_im = abs(WeightsCPU[0,:,:] - WeightsGPU[0,:,:])
-diff_im[diff_im > tolerance] = 1
-a=fig.add_subplot(1,2,2)
-imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
-plt.title('{}'.format('Pixels larger threshold difference'))
-if (diff_im.sum() > 1):
-    print ("Arrays do not match!")
-else:
-    print ("Arrays match")
-#%%
\ No newline at end of file
diff --git a/docs/demos/demo_gpu_regularisers.py b/docs/demos/demo_gpu_regularisers.py
deleted file mode 100644
index e1c6575..0000000
--- a/docs/demos/demo_gpu_regularisers.py
+++ /dev/null
@@ -1,518 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of GPU regularisers
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from ccpi.filters.regularisers import PatchSelect, NLTV
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)                     
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________ROF-TV regulariser_____________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the ROF-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 1200,\
-        'time_marching_parameter': 0.0025
-        }
-print ("##############ROF TV GPU##################")
-start_time = timeit.default_timer()
-rof_gpu = ROF_TV(pars['input'], 
-                     pars['regularisation_parameter'],
-                     pars['number_of_iterations'], 
-                     pars['time_marching_parameter'],'gpu')
-                     
-rms = rmse(Im, rof_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = ROF_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-TV regulariser_____________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the FGP-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :1200 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("##############FGP TV GPU##################")
-start_time = timeit.default_timer()
-fgp_gpu = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, fgp_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________SB-TV regulariser______________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the SB-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :150 ,\
-        'tolerance_constant':1e-06,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("##############SB TV GPU##################")
-start_time = timeit.default_timer()
-sb_gpu = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, sb_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = SB_TV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_____Total Generalised Variation (2D)______")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : u0,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :1250 ,\
-        'LipshitzConstant' :12 ,\
-        }
-        
-print ("#############TGV CPU####################")
-start_time = timeit.default_timer()
-tgv_gpu = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'gpu')  
-             
-             
-rms = rmse(Im, tgv_gpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("______________LLT- ROF (2D)________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : u0,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.01, \
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-        
-print ("#############LLT- ROF GPU####################")
-start_time = timeit.default_timer()
-lltrof_gpu = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-             
-             
-rms = rmse(Im, lltrof_gpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________NDF regulariser_____________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the NDF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : u0,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-
-print ("##############NDF GPU##################")
-start_time = timeit.default_timer()
-ndf_gpu = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'gpu')  
-             
-rms = rmse(Im, ndf_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = NDF
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (2D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of Diff4th regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : u0,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############DIFF4th CPU################")
-start_time = timeit.default_timer()
-diff4_gpu = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-             
-rms = rmse(Im, diff4_gpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal patches pre-calculation____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-start_time = timeit.default_timer()
-# set parameters
-pars = {'algorithm' : PatchSelect, \
-        'input' : u0,\
-        'searchwindow': 7, \
-        'patchwindow': 2,\
-        'neighbours' : 15 ,\
-        'edge_parameter':0.18}
-
-H_i, H_j, Weights = PatchSelect(pars['input'], 
-              pars['searchwindow'],
-              pars['patchwindow'], 
-              pars['neighbours'],
-              pars['edge_parameter'],'gpu')
-              
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-"""
-plt.figure()
-plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1)
-plt.show()
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Nonlocal Total Variation penalty____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NLTV regulariser using the CPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-pars2 = {'algorithm' : NLTV, \
-        'input' : u0,\
-        'H_i': H_i, \
-        'H_j': H_j,\
-        'H_k' : 0,\
-        'Weights' : Weights,\
-        'regularisation_parameter': 0.02,\
-        'iterations': 3
-        }
-start_time = timeit.default_timer()
-nltv_cpu = NLTV(pars2['input'], 
-              pars2['H_i'],
-              pars2['H_j'], 
-              pars2['H_k'],
-              pars2['Weights'],
-              pars2['regularisation_parameter'],
-              pars2['iterations'])
-
-rms = rmse(Im, nltv_cpu)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(nltv_cpu, cmap="gray")
-plt.title('{}'.format('CPU results'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("____________FGP-dTV bench___________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(u0,cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : u0,\
-        'refdata' : u_ref,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :2000 ,\
-        'tolerance_constant':1e-06,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("##############FGP dTV GPU##################")
-start_time = timeit.default_timer()
-fgp_dtv_gpu = FGP_dTV(pars['input'], 
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-                                   
-rms = rmse(Im, fgp_dtv_gpu)
-pars['rmse'] = rms
-pars['algorithm'] = FGP_dTV
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
-plt.title('{}'.format('GPU results'))
diff --git a/docs/demos/demo_gpu_regularisers3D.py b/docs/demos/demo_gpu_regularisers3D.py
deleted file mode 100644
index b6058d2..0000000
--- a/docs/demos/demo_gpu_regularisers3D.py
+++ /dev/null
@@ -1,460 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Feb 22 11:39:43 2018
-
-Demonstration of GPU regularisers
-
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
-from qualitymetrics import rmse
-###############################################################################
-def printParametersToString(pars):
-        txt = r''
-        for key, value in pars.items():
-            if key== 'algorithm' :
-                txt += "{0} = {1}".format(key, value.__name__)
-            elif key == 'input':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            elif key == 'refdata':
-                txt += "{0} = {1}".format(key, np.shape(value))
-            else:
-                txt += "{0} = {1}".format(key, value)
-            txt += '\n'
-        return txt
-###############################################################################
-#%%
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-
-# read image
-Im = plt.imread(filename)                     
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-u0 = Im + np.random.normal(loc = 0 ,
-                                  scale = perc * Im , 
-                                  size = np.shape(Im))
-u_ref = Im + np.random.normal(loc = 0 ,
-                                  scale = 0.01 * Im , 
-                                  size = np.shape(Im))
-(N,M) = np.shape(u0)
-# map the u0 u0->u0>0
-# f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
-u0 = u0.astype('float32')
-u_ref = u_ref.astype('float32')
-"""
-M = M-100
-u_ref2 = np.zeros([N,M],dtype='float32')
-u_ref2[:,0:M] = u_ref[:,0:M]
-u_ref = u_ref2
-del u_ref2
-
-u02 = np.zeros([N,M],dtype='float32')
-u02[:,0:M] = u0[:,0:M]
-u0 = u02
-del u02
-
-Im2 = np.zeros([N,M],dtype='float32')
-Im2[:,0:M] = Im[:,0:M]
-Im = Im2
-del Im2
-"""
-
-
-slices = 20
-
-filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
-Im = plt.imread(filename)
-Im = np.asarray(Im, dtype='float32')
-
-Im = Im/255
-perc = 0.05
-
-noisyVol = np.zeros((slices,N,N),dtype='float32')
-noisyRef = np.zeros((slices,N,N),dtype='float32')
-idealVol = np.zeros((slices,N,N),dtype='float32')
-
-for i in range (slices):
-    noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
-    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
-    idealVol[i,:,:] = Im
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________ROF-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of ROF-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy 15th slice of a volume')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm': ROF_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04,\
-        'number_of_iterations': 500,\
-        'time_marching_parameter': 0.0025        
-        }
-print ("#############ROF TV GPU####################")
-start_time = timeit.default_timer()
-rof_gpu3D = ROF_TV(pars['input'],
-             pars['regularisation_parameter'],
-             pars['number_of_iterations'],
-             pars['time_marching_parameter'],'gpu')
-rms = rmse(idealVol, rof_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(rof_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using ROF-TV'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-TV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("#############FGP TV GPU####################")
-start_time = timeit.default_timer()
-fgp_gpu3D = FGP_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-
-rms = rmse(idealVol, fgp_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using FGP-TV'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________SB-TV (3D)__________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of SB-TV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : SB_TV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :100 ,\
-        'tolerance_constant':1e-05,\
-        'methodTV': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("#############SB TV GPU####################")
-start_time = timeit.default_timer()
-sb_gpu3D = SB_TV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['methodTV'],
-              pars['printingOut'],'gpu')
-
-rms = rmse(idealVol, sb_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(sb_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using SB-TV'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________LLT-ROF (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of LLT-ROF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : LLT_ROF, \
-        'input' : noisyVol,\
-        'regularisation_parameterROF':0.04, \
-        'regularisation_parameterLLT':0.015, \
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter' :0.0025 ,\
-        }
-
-print ("#############LLT ROF CPU####################")
-start_time = timeit.default_timer()
-lltrof_gpu3D = LLT_ROF(pars['input'], 
-              pars['regularisation_parameterROF'],
-              pars['regularisation_parameterLLT'],
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-
-rms = rmse(idealVol, lltrof_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(lltrof_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using LLT-ROF'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________TGV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of TGV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : TGV, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.04, \
-        'alpha1':1.0,\
-        'alpha0':2.0,\
-        'number_of_iterations' :600 ,\
-        'LipshitzConstant' :12 ,\
-        }
-
-print ("#############TGV GPU####################")
-start_time = timeit.default_timer()
-tgv_gpu3D = TGV(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['alpha1'],
-              pars['alpha0'],
-              pars['number_of_iterations'],
-              pars['LipshitzConstant'],'gpu')
-             
-
-rms = rmse(idealVol, tgv_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(tgv_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using TGV'))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________NDF-TV (3D)_________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of NDF regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : NDF, \
-        'input' : noisyVol,\
-        'regularisation_parameter':0.025, \
-        'edge_parameter':0.015,\
-        'number_of_iterations' :500 ,\
-        'time_marching_parameter':0.025,\
-        'penalty_type':  1
-        }
-
-print ("#############NDF GPU####################")
-start_time = timeit.default_timer()
-ndf_gpu3D = NDF(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'], 
-              pars['penalty_type'],'gpu')
-
-rms = rmse(idealVol, ndf_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(ndf_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using NDF'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("___Anisotropic Diffusion 4th Order (3D)____")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of DIFF4th regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : Diff4th, \
-        'input' : noisyVol,\
-        'regularisation_parameter':3.5, \
-        'edge_parameter':0.02,\
-        'number_of_iterations' :300 ,\
-        'time_marching_parameter':0.0015
-        }
-        
-print ("#############DIFF4th CPU################")
-start_time = timeit.default_timer()
-diff4_gpu3D = Diff4th(pars['input'], 
-              pars['regularisation_parameter'],
-              pars['edge_parameter'], 
-              pars['number_of_iterations'],
-              pars['time_marching_parameter'],'gpu')
-             
-rms = rmse(idealVol, diff4_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(diff4_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('GPU results'))
-
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("_______________FGP-dTV (3D)________________")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-
-## plot 
-fig = plt.figure()
-plt.suptitle('Performance of FGP-dTV regulariser using the GPU')
-a=fig.add_subplot(1,2,1)
-a.set_title('Noisy Image')
-imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
-
-# set parameters
-pars = {'algorithm' : FGP_dTV, \
-        'input' : noisyVol,\
-        'refdata' : noisyRef,\
-        'regularisation_parameter':0.04, \
-        'number_of_iterations' :300 ,\
-        'tolerance_constant':0.00001,\
-        'eta_const':0.2,\
-        'methodTV': 0 ,\
-        'nonneg': 0 ,\
-        'printingOut': 0 
-        }
-
-print ("#############FGP TV GPU####################")
-start_time = timeit.default_timer()
-fgp_dTV_gpu3D = FGP_dTV(pars['input'],
-              pars['refdata'], 
-              pars['regularisation_parameter'],
-              pars['number_of_iterations'],
-              pars['tolerance_constant'], 
-              pars['eta_const'],
-              pars['methodTV'],
-              pars['nonneg'],
-              pars['printingOut'],'gpu')
-
-rms = rmse(idealVol, fgp_dTV_gpu3D)
-pars['rmse'] = rms
-
-txtstr = printParametersToString(pars)
-txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
-print (txtstr)
-a=fig.add_subplot(1,2,2)
-
-# these are matplotlib.patch.Patch properties
-props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
-# place a text box in upper left in axes coords
-a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
-         verticalalignment='top', bbox=props)
-imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray")
-plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV'))
-#%%
diff --git a/docs/demos/qualitymetrics.py b/docs/demos/qualitymetrics.py
deleted file mode 100644
index 850829e..0000000
--- a/docs/demos/qualitymetrics.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Feb 21 13:34:32 2018
-# quality metrics
-@authors: Daniil Kazantsev, Edoardo Pasca
-"""
-import numpy as np
-
-def nrmse(im1, im2):
-    rmse = np.sqrt(np.sum((im2 - im1) ** 2) / float(im1.size))
-    max_val = max(np.max(im1), np.max(im2))
-    min_val = min(np.min(im1), np.min(im2))
-    return 1 - (rmse / (max_val - min_val))
-    
-def rmse(im1, im2):
-    rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
-    return rmse
diff --git a/docs/images/TV_vs_NLTV.jpg b/docs/images/TV_vs_NLTV.jpg
deleted file mode 100644
index e976512..0000000
Binary files a/docs/images/TV_vs_NLTV.jpg and /dev/null differ
diff --git a/docs/images/probl.pdf b/docs/images/probl.pdf
deleted file mode 100644
index 6a06021..0000000
Binary files a/docs/images/probl.pdf and /dev/null differ
diff --git a/docs/images/probl.png b/docs/images/probl.png
deleted file mode 100644
index af0e852..0000000
Binary files a/docs/images/probl.png and /dev/null differ
diff --git a/docs/images/reg_penalties.jpg b/docs/images/reg_penalties.jpg
deleted file mode 100644
index 923d5c4..0000000
Binary files a/docs/images/reg_penalties.jpg and /dev/null differ
diff --git a/docs/installation.txt b/docs/installation.txt
deleted file mode 100644
index f6db38c..0000000
--- a/docs/installation.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-One can install CCPi-RGL toolkit using cmake:
-
-
-cmake ../CCPi-Regularisation-Toolkit/ -DBUILD_MATLAB_WRAPPERS=ON -DBUILD_PYTHON_WRAPPERS=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DMatlab_ROOT_DIR=<Matlab directory> -DBUILD_CUDA=OFF
-
-make 
-
-make install 
-
-Running Matlab from Linux do:
-PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" ./matlab -nosplash &
diff --git a/test/test_CPU_regularisers.py b/test/test_CPU_regularisers.py
new file mode 100644
index 0000000..42e4735
--- /dev/null
+++ b/test/test_CPU_regularisers.py
@@ -0,0 +1,91 @@
+import unittest
+import math
+import os
+import timeit
+from ccpi.filters.regularisers import FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th, ROF_TV
+from testroutines import *
+
+###############################################################################
+
+class TestRegularisers(unittest.TestCase):
+
+    def getPars(self,alg,noi=1200):
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+        Im = Im / 255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc=0,
+                                   scale=perc * Im,
+                                   size=np.shape(Im))
+        u_ref = Im + np.random.normal(loc=0,
+                                      scale=0.01 * Im,
+                                      size=np.shape(Im))
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+        # set parameters
+        pars = {'algorithm': alg, \
+                'input': u0, \
+                'regularisation_parameter': 0.04, \
+                'number_of_iterations': noi, \
+                'tolerance_constant': 0.00001, \
+                'methodTV': 0, \
+                'nonneg': 0, \
+                'printingOut': 0, \
+                'time_marching_parameter': 0.00002
+                }
+        return Im, pars
+
+
+    def test_FGP_TV_CPU(self):
+        Im, pars = self.getPars(FGP_TV)
+
+        fgp_cpu = FGP_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['tolerance_constant'],
+                         pars['methodTV'],
+                         pars['nonneg'],
+                         pars['printingOut'], 'cpu')
+
+        rms = rmse(Im, fgp_cpu)
+        pars['rmse'] = rms
+        self.assertAlmostEqual(rms,0.02,delta=0.01)
+
+    def test_TV_ROF_CPU(self):
+        # set parameters
+        Im, pars = self.getPars(ROF_TV)
+        # call routine
+        fgp_cpu = ROF_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['time_marching_parameter'], 'cpu')
+
+        rms = rmse(Im, fgp_cpu)
+        pars['rmse'] = rms
+
+        #txtstr = printParametersToString(pars)
+        #print(txtstr)
+        # now test that it generates some expected output
+        self.assertAlmostEqual(rms,0.02,delta=0.01)
+
+    def test_SB_TV_CPU(self):
+        # set parameters
+        Im, pars = self.getPars(SB_TV)
+        # call routine
+        fgp_cpu = SB_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['time_marching_parameter'], 'cpu')
+
+        rms = rmse(Im, fgp_cpu)
+        pars['rmse'] = rms
+
+        #txtstr = printParametersToString(pars)
+        #print(txtstr)
+        # now test that it generates some expected output
+        self.assertAlmostEqual(rms,0.02,delta=0.01)
diff --git a/test/test_FGP_TV.py b/test/test_FGP_TV.py
new file mode 100644
index 0000000..f0dc540
--- /dev/null
+++ b/test/test_FGP_TV.py
@@ -0,0 +1,152 @@
+import unittest
+import math
+import os
+import timeit
+from ccpi.filters.regularisers import FGP_TV
+#, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th
+from testroutines import *
+
+###############################################################################
+
+class TestRegularisers(unittest.TestCase):
+
+    def test_FGP_TV_CPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+
+        Im = Im / 255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc=0,
+                                   scale=perc * Im,
+                                   size=np.shape(Im))
+        u_ref = Im + np.random.normal(loc=0,
+                                      scale=0.01 * Im,
+                                      size=np.shape(Im))
+
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+
+        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print("____________FGP-TV bench___________________")
+        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+        # set parameters
+        pars = {'algorithm': FGP_TV, \
+                'input': u0, \
+                'regularisation_parameter': 0.04, \
+                'number_of_iterations': 1200, \
+                'tolerance_constant': 0.00001, \
+                'methodTV': 0, \
+                'nonneg': 0, \
+                'printingOut': 0
+                }
+
+        print("#############FGP TV CPU####################")
+        start_time = timeit.default_timer()
+        fgp_cpu = FGP_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['tolerance_constant'],
+                         pars['methodTV'],
+                         pars['nonneg'],
+                         pars['printingOut'], 'cpu')
+
+        rms = rmse(Im, fgp_cpu)
+        pars['rmse'] = rms
+
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time', timeit.default_timer() - start_time)
+        print(txtstr)
+        self.assertTrue(math.isclose(rms,0.02,rel_tol=1e-1))
+
+    def test_FGP_TV_CPU_vs_GPU(self):
+        print(__name__)
+        filename = os.path.join("lena_gray_512.tif")
+        plt = TiffReader()
+        # read image
+        Im = plt.imread(filename)
+        Im = np.asarray(Im, dtype='float32')
+
+        Im = Im / 255
+        perc = 0.05
+        u0 = Im + np.random.normal(loc=0,
+                                   scale=perc * Im,
+                                   size=np.shape(Im))
+        u_ref = Im + np.random.normal(loc=0,
+                                      scale=0.01 * Im,
+                                      size=np.shape(Im))
+
+        # map the u0 u0->u0>0
+        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
+        u0 = u0.astype('float32')
+        u_ref = u_ref.astype('float32')
+
+        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+        print("____________FGP-TV bench___________________")
+        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+        # set parameters
+        pars = {'algorithm': FGP_TV, \
+                'input': u0, \
+                'regularisation_parameter': 0.04, \
+                'number_of_iterations': 1200, \
+                'tolerance_constant': 0.00001, \
+                'methodTV': 0, \
+                'nonneg': 0, \
+                'printingOut': 0
+                }
+
+        print("#############FGP TV CPU####################")
+        start_time = timeit.default_timer()
+        fgp_cpu = FGP_TV(pars['input'],
+                         pars['regularisation_parameter'],
+                         pars['number_of_iterations'],
+                         pars['tolerance_constant'],
+                         pars['methodTV'],
+                         pars['nonneg'],
+                         pars['printingOut'], 'cpu')
+
+        rms = rmse(Im, fgp_cpu)
+        pars['rmse'] = rms
+
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time', timeit.default_timer() - start_time)
+        print(txtstr)
+
+        print("##############FGP TV GPU##################")
+        start_time = timeit.default_timer()
+        try:
+            fgp_gpu = FGP_TV(pars['input'],
+                             pars['regularisation_parameter'],
+                             pars['number_of_iterations'],
+                             pars['tolerance_constant'],
+                             pars['methodTV'],
+                             pars['nonneg'],
+                             pars['printingOut'], 'gpu')
+
+        except ValueError as ve:
+            self.skipTest("Results not comparable. GPU computing error.")
+
+        rms = rmse(Im, fgp_gpu)
+        pars['rmse'] = rms
+        pars['algorithm'] = FGP_TV
+        txtstr = printParametersToString(pars)
+        txtstr += "%s = %.3fs" % ('elapsed time', timeit.default_timer() - start_time)
+        print(txtstr)
+
+        print("--------Compare the results--------")
+        tolerance = 1e-05
+        diff_im = np.zeros(np.shape(fgp_cpu))
+        diff_im = abs(fgp_cpu - fgp_gpu)
+        diff_im[diff_im > tolerance] = 1
+
+        self.assertLessEqual(diff_im.sum(), 1)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_ROF_TV.py b/test/test_ROF_TV.py
index dda38b7..fa35680 100644
--- a/test/test_ROF_TV.py
+++ b/test/test_ROF_TV.py
@@ -53,9 +53,6 @@ class TestRegularisers(unittest.TestCase):
 
 
     def test_ROF_TV_CPU_vs_GPU(self):
-        # print ("tomas debug test function")
-        print(__name__)
-        self.fail("testfail2")
         filename = os.path.join("lena_gray_512.tif")
         plt = TiffReader()
         # read image
-- 
cgit v1.2.3


From c237d292999c93df09ca3679876d225896dd0ff9 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Fri, 22 Feb 2019 12:41:28 +0000
Subject: updated readme

---
 Readme.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Readme.md b/Readme.md
index 112d606..187f8ac 100644
--- a/Readme.md
+++ b/Readme.md
@@ -9,15 +9,15 @@
 **Iterative image reconstruction (IIR) methods normally require regularisation to stabilise the convergence and make the reconstruction problem (inverse problem) more well-posed. The CCPi-RGL software provides 2D/3D and multi-channel regularisation strategies to ensure better performance of IIR methods. The regularisation modules are well-suited to use with [splitting algorithms](https://en.wikipedia.org/wiki/Augmented_Lagrangian_method#Alternating_direction_method_of_multipliers), such as, [ADMM](https://github.com/dkazanc/ADMM-tomo) and [FISTA](https://github.com/dkazanc/FISTA-tomo). Furthermore, the toolkit can be used for simpler inversion tasks, such as, image denoising, inpaiting, deconvolution etc. The core modules are written in C-OMP and CUDA languages and wrappers for Matlab and Python are provided.** 
 
 <div align="center">
-  <img src="docs/images/probl.png" height="225"><br>  
+  <img src="demos/images/probl.png" height="225"><br>  
 </div>
 
 <div align="center">
-  <img src="docs/images/reg_penalties.jpg" height="450"><br>  
+  <img src="demos/images/reg_penalties.jpg" height="450"><br>  
 </div>
 
 <div align="center">
-  <img src="docs/images/TV_vs_NLTV.jpg" height="300"><br>  
+  <img src="demos/images/TV_vs_NLTV.jpg" height="300"><br>  
 </div>
 
 ## Prerequisites: 
@@ -183,8 +183,7 @@ addpath(/path/to/library);
 
 ### Applications:
 
-* [Regularised FISTA iterative reconstruction algorithm for X-ray tomographic reconstruction with highly inaccurate measurements (MATLAB/Python code)](https://github.com/dkazanc/FISTA-tomo)
-* [Regularised ADMM iterative reconstruction algorithm for X-ray tomographic reconstruction (MATLAB code)](https://github.com/dkazanc/ADMM-tomo)
+* [A library of tomographic reconstruction methods: direct and model-based iterative (MATLAB/Python code)](https://github.com/dkazanc/TomoRec)
 * [Joint image reconstruction method with correlative multi-channel prior for X-ray spectral computed tomography (MATLAB code)](https://github.com/dkazanc/multi-channel-X-ray-CT)
 
 ### License:
-- 
cgit v1.2.3


From 5a2fd376130ea2c7c4ac1704bc9d2f087522855d Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Fri, 22 Feb 2019 08:10:48 -0500
Subject: UPDATE:test and pycharm project files

---
 .idea/CCPi-Regularisation-Toolkit.iml | 16 ++++++++++++++++
 .idea/encodings.xml                   |  4 ++++
 .idea/misc.xml                        |  7 +++++++
 .idea/modules.xml                     |  8 ++++++++
 .idea/vcs.xml                         |  6 ++++++
 test/test_CPU_regularisers.py         |  5 +++++
 6 files changed, 46 insertions(+)
 create mode 100644 .idea/CCPi-Regularisation-Toolkit.iml
 create mode 100644 .idea/encodings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml

diff --git a/.idea/CCPi-Regularisation-Toolkit.iml b/.idea/CCPi-Regularisation-Toolkit.iml
new file mode 100644
index 0000000..c02bd4f
--- /dev/null
+++ b/.idea/CCPi-Regularisation-Toolkit.iml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/Python/src" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/Python/ccpi" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.6 (py3)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..15a15b2
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding" addBOMForNewFiles="with NO BOM" />
+</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..c078c5c
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3)" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..e00e88e
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/CCPi-Regularisation-Toolkit.iml" filepath="$PROJECT_DIR$/.idea/CCPi-Regularisation-Toolkit.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/test/test_CPU_regularisers.py b/test/test_CPU_regularisers.py
index 8940926..552e64e 100644
--- a/test/test_CPU_regularisers.py
+++ b/test/test_CPU_regularisers.py
@@ -126,3 +126,8 @@ class TestRegularisers(unittest.TestCase):
 
         # now test that it generates some expected output
         self.assertAlmostEqual(rms, 0.02, delta=0.01)
+
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
cgit v1.2.3


From 606d503f828777b11df4e959d6a5b25b4a86e0c7 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Sat, 23 Feb 2019 21:49:04 +0000
Subject: supp created, pillow dependency corrected in yaml

---
 Readme.md                                   |  2 +-
 Wrappers/Python/ccpi/supp/__init__.py       |  0
 Wrappers/Python/ccpi/supp/qualitymetrics.py | 65 -----------------------------
 recipe/meta.yaml                            |  3 +-
 src/Python/ccpi/supp/__init__.py            |  0
 src/Python/ccpi/supp/qualitymetrics.py      | 65 +++++++++++++++++++++++++++++
 6 files changed, 68 insertions(+), 67 deletions(-)
 delete mode 100644 Wrappers/Python/ccpi/supp/__init__.py
 delete mode 100644 Wrappers/Python/ccpi/supp/qualitymetrics.py
 create mode 100644 src/Python/ccpi/supp/__init__.py
 create mode 100644 src/Python/ccpi/supp/qualitymetrics.py

diff --git a/Readme.md b/Readme.md
index 3a39066..92b4273 100644
--- a/Readme.md
+++ b/Readme.md
@@ -110,7 +110,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge
 #### Python (conda-build)
 ```
 	export CIL_VERSION=19.02
-	conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5 
+	conda build recipe/ --numpy 1.12 --python 3.5  
 	conda install ccpi-regulariser=${CIL_VERSION} --use-local --force
 	cd demos/
 	python demo_cpu_regularisers.py # to run CPU demo
diff --git a/Wrappers/Python/ccpi/supp/__init__.py b/Wrappers/Python/ccpi/supp/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/Wrappers/Python/ccpi/supp/qualitymetrics.py b/Wrappers/Python/ccpi/supp/qualitymetrics.py
deleted file mode 100644
index f44d832..0000000
--- a/Wrappers/Python/ccpi/supp/qualitymetrics.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-"""
-A class for some standard image quality metrics
-"""
-import numpy as np
-
-class QualityTools:
-    def __init__(self, im1, im2):
-        if im1.size != im2.size:
-            print ('Error: Sizes of images/volumes are different')
-            raise SystemExit
-        self.im1 = im1 # image or volume - 1
-        self.im2 = im2 # image or volume - 2
-    def nrmse(self):
-        """ Normalised Root Mean Square Error """
-        rmse = np.sqrt(np.sum((self.im2 - self.im1) ** 2) / float(self.im1.size))
-        max_val = max(np.max(self.im1), np.max(self.im2))
-        min_val = min(np.min(self.im1), np.min(self.im2))
-        return 1 - (rmse / (max_val - min_val))
-    def rmse(self):
-        """ Root Mean Square Error """
-        rmse = np.sqrt(np.sum((self.im1 - self.im2) ** 2) / float(self.im1.size))
-        return rmse
-    def ssim(self, window, k=(0.01, 0.03), l=255):
-        from scipy.signal import fftconvolve
-        """See https://ece.uwaterloo.ca/~z70wang/research/ssim/"""
-        # Check if the window is smaller than the images.
-        for a, b in zip(window.shape, self.im1.shape):
-            if a > b:
-                return None, None
-        # Values in k must be positive according to the base implementation.
-        for ki in k:
-            if ki < 0:
-                return None, None
-    
-        c1 = (k[0] * l) ** 2
-        c2 = (k[1] * l) ** 2
-        window = window/np.sum(window)
-    
-        mu1 = fftconvolve(self.im1, window, mode='valid')
-        mu2 = fftconvolve(self.im2, window, mode='valid')
-        mu1_sq = mu1 * mu1
-        mu2_sq = mu2 * mu2
-        mu1_mu2 = mu1 * mu2
-        sigma1_sq = fftconvolve(self.im1 * self.im1, window, mode='valid') - mu1_sq
-        sigma2_sq = fftconvolve(self.im2 * self.im2, window, mode='valid') - mu2_sq
-        sigma12 = fftconvolve(self.im1 * self.im2, window, mode='valid') - mu1_mu2
-    
-        if c1 > 0 and c2 > 0:
-            num = (2 * mu1_mu2 + c1) * (2 * sigma12 + c2)
-            den = (mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)
-            ssim_map = num / den
-        else:
-            num1 = 2 * mu1_mu2 + c1
-            num2 = 2 * sigma12 + c2
-            den1 = mu1_sq + mu2_sq + c1
-            den2 = sigma1_sq + sigma2_sq + c2
-            ssim_map = np.ones(np.shape(mu1))
-            index = (den1 * den2) > 0
-            ssim_map[index] = (num1[index] * num2[index]) / (den1[index] * den2[index])
-            index = (den1 != 0) & (den2 == 0)
-            ssim_map[index] = num1[index] / den1[index]    
-        mssim = ssim_map.mean()
-        return mssim, ssim_map
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 61d17bd..527ad32 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -12,7 +12,8 @@ test:
   files:
     - ../test/lena_gray_512.tif
   requires:
-    - pillow=4.1.1
+    - pillow
+    - pillow=4.1.1 # [win]
 
 requirements:
   build:
diff --git a/src/Python/ccpi/supp/__init__.py b/src/Python/ccpi/supp/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/Python/ccpi/supp/qualitymetrics.py b/src/Python/ccpi/supp/qualitymetrics.py
new file mode 100644
index 0000000..f44d832
--- /dev/null
+++ b/src/Python/ccpi/supp/qualitymetrics.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+A class for some standard image quality metrics
+"""
+import numpy as np
+
+class QualityTools:
+    def __init__(self, im1, im2):
+        if im1.size != im2.size:
+            print ('Error: Sizes of images/volumes are different')
+            raise SystemExit
+        self.im1 = im1 # image or volume - 1
+        self.im2 = im2 # image or volume - 2
+    def nrmse(self):
+        """ Normalised Root Mean Square Error """
+        rmse = np.sqrt(np.sum((self.im2 - self.im1) ** 2) / float(self.im1.size))
+        max_val = max(np.max(self.im1), np.max(self.im2))
+        min_val = min(np.min(self.im1), np.min(self.im2))
+        return 1 - (rmse / (max_val - min_val))
+    def rmse(self):
+        """ Root Mean Square Error """
+        rmse = np.sqrt(np.sum((self.im1 - self.im2) ** 2) / float(self.im1.size))
+        return rmse
+    def ssim(self, window, k=(0.01, 0.03), l=255):
+        from scipy.signal import fftconvolve
+        """See https://ece.uwaterloo.ca/~z70wang/research/ssim/"""
+        # Check if the window is smaller than the images.
+        for a, b in zip(window.shape, self.im1.shape):
+            if a > b:
+                return None, None
+        # Values in k must be positive according to the base implementation.
+        for ki in k:
+            if ki < 0:
+                return None, None
+    
+        c1 = (k[0] * l) ** 2
+        c2 = (k[1] * l) ** 2
+        window = window/np.sum(window)
+    
+        mu1 = fftconvolve(self.im1, window, mode='valid')
+        mu2 = fftconvolve(self.im2, window, mode='valid')
+        mu1_sq = mu1 * mu1
+        mu2_sq = mu2 * mu2
+        mu1_mu2 = mu1 * mu2
+        sigma1_sq = fftconvolve(self.im1 * self.im1, window, mode='valid') - mu1_sq
+        sigma2_sq = fftconvolve(self.im2 * self.im2, window, mode='valid') - mu2_sq
+        sigma12 = fftconvolve(self.im1 * self.im2, window, mode='valid') - mu1_mu2
+    
+        if c1 > 0 and c2 > 0:
+            num = (2 * mu1_mu2 + c1) * (2 * sigma12 + c2)
+            den = (mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)
+            ssim_map = num / den
+        else:
+            num1 = 2 * mu1_mu2 + c1
+            num2 = 2 * sigma12 + c2
+            den1 = mu1_sq + mu2_sq + c1
+            den2 = sigma1_sq + sigma2_sq + c2
+            ssim_map = np.ones(np.shape(mu1))
+            index = (den1 * den2) > 0
+            ssim_map[index] = (num1[index] * num2[index]) / (den1[index] * den2[index])
+            index = (den1 != 0) & (den2 == 0)
+            ssim_map[index] = num1[index] / den1[index]    
+        mssim = ssim_map.mean()
+        return mssim, ssim_map
-- 
cgit v1.2.3


From 047d9e2a7dda92e13414b980a93c3f1724665241 Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Mon, 25 Feb 2019 03:35:50 -0500
Subject: MOVE: Wrappers/Python/supp to src/Python/ccpi/supp

---
 Wrappers/Python/ccpi/supp/__init__.py       |  0
 Wrappers/Python/ccpi/supp/qualitymetrics.py | 65 -----------------------------
 src/Python/__init__.py                      |  0
 src/Python/ccpi/supp/__init__.py            |  0
 src/Python/ccpi/supp/qualitymetrics.py      | 65 +++++++++++++++++++++++++++++
 src/__init__.py                             |  0
 test/test_CPU_regularisers.py               |  2 -
 7 files changed, 65 insertions(+), 67 deletions(-)
 delete mode 100644 Wrappers/Python/ccpi/supp/__init__.py
 delete mode 100644 Wrappers/Python/ccpi/supp/qualitymetrics.py
 create mode 100644 src/Python/__init__.py
 create mode 100644 src/Python/ccpi/supp/__init__.py
 create mode 100644 src/Python/ccpi/supp/qualitymetrics.py
 create mode 100644 src/__init__.py

diff --git a/Wrappers/Python/ccpi/supp/__init__.py b/Wrappers/Python/ccpi/supp/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/Wrappers/Python/ccpi/supp/qualitymetrics.py b/Wrappers/Python/ccpi/supp/qualitymetrics.py
deleted file mode 100644
index f44d832..0000000
--- a/Wrappers/Python/ccpi/supp/qualitymetrics.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-"""
-A class for some standard image quality metrics
-"""
-import numpy as np
-
-class QualityTools:
-    def __init__(self, im1, im2):
-        if im1.size != im2.size:
-            print ('Error: Sizes of images/volumes are different')
-            raise SystemExit
-        self.im1 = im1 # image or volume - 1
-        self.im2 = im2 # image or volume - 2
-    def nrmse(self):
-        """ Normalised Root Mean Square Error """
-        rmse = np.sqrt(np.sum((self.im2 - self.im1) ** 2) / float(self.im1.size))
-        max_val = max(np.max(self.im1), np.max(self.im2))
-        min_val = min(np.min(self.im1), np.min(self.im2))
-        return 1 - (rmse / (max_val - min_val))
-    def rmse(self):
-        """ Root Mean Square Error """
-        rmse = np.sqrt(np.sum((self.im1 - self.im2) ** 2) / float(self.im1.size))
-        return rmse
-    def ssim(self, window, k=(0.01, 0.03), l=255):
-        from scipy.signal import fftconvolve
-        """See https://ece.uwaterloo.ca/~z70wang/research/ssim/"""
-        # Check if the window is smaller than the images.
-        for a, b in zip(window.shape, self.im1.shape):
-            if a > b:
-                return None, None
-        # Values in k must be positive according to the base implementation.
-        for ki in k:
-            if ki < 0:
-                return None, None
-    
-        c1 = (k[0] * l) ** 2
-        c2 = (k[1] * l) ** 2
-        window = window/np.sum(window)
-    
-        mu1 = fftconvolve(self.im1, window, mode='valid')
-        mu2 = fftconvolve(self.im2, window, mode='valid')
-        mu1_sq = mu1 * mu1
-        mu2_sq = mu2 * mu2
-        mu1_mu2 = mu1 * mu2
-        sigma1_sq = fftconvolve(self.im1 * self.im1, window, mode='valid') - mu1_sq
-        sigma2_sq = fftconvolve(self.im2 * self.im2, window, mode='valid') - mu2_sq
-        sigma12 = fftconvolve(self.im1 * self.im2, window, mode='valid') - mu1_mu2
-    
-        if c1 > 0 and c2 > 0:
-            num = (2 * mu1_mu2 + c1) * (2 * sigma12 + c2)
-            den = (mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)
-            ssim_map = num / den
-        else:
-            num1 = 2 * mu1_mu2 + c1
-            num2 = 2 * sigma12 + c2
-            den1 = mu1_sq + mu2_sq + c1
-            den2 = sigma1_sq + sigma2_sq + c2
-            ssim_map = np.ones(np.shape(mu1))
-            index = (den1 * den2) > 0
-            ssim_map[index] = (num1[index] * num2[index]) / (den1[index] * den2[index])
-            index = (den1 != 0) & (den2 == 0)
-            ssim_map[index] = num1[index] / den1[index]    
-        mssim = ssim_map.mean()
-        return mssim, ssim_map
diff --git a/src/Python/__init__.py b/src/Python/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/Python/ccpi/supp/__init__.py b/src/Python/ccpi/supp/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/Python/ccpi/supp/qualitymetrics.py b/src/Python/ccpi/supp/qualitymetrics.py
new file mode 100644
index 0000000..f44d832
--- /dev/null
+++ b/src/Python/ccpi/supp/qualitymetrics.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+A class for some standard image quality metrics
+"""
+import numpy as np
+
+class QualityTools:
+    def __init__(self, im1, im2):
+        if im1.size != im2.size:
+            print ('Error: Sizes of images/volumes are different')
+            raise SystemExit
+        self.im1 = im1 # image or volume - 1
+        self.im2 = im2 # image or volume - 2
+    def nrmse(self):
+        """ Normalised Root Mean Square Error """
+        rmse = np.sqrt(np.sum((self.im2 - self.im1) ** 2) / float(self.im1.size))
+        max_val = max(np.max(self.im1), np.max(self.im2))
+        min_val = min(np.min(self.im1), np.min(self.im2))
+        return 1 - (rmse / (max_val - min_val))
+    def rmse(self):
+        """ Root Mean Square Error """
+        rmse = np.sqrt(np.sum((self.im1 - self.im2) ** 2) / float(self.im1.size))
+        return rmse
+    def ssim(self, window, k=(0.01, 0.03), l=255):
+        from scipy.signal import fftconvolve
+        """See https://ece.uwaterloo.ca/~z70wang/research/ssim/"""
+        # Check if the window is smaller than the images.
+        for a, b in zip(window.shape, self.im1.shape):
+            if a > b:
+                return None, None
+        # Values in k must be positive according to the base implementation.
+        for ki in k:
+            if ki < 0:
+                return None, None
+    
+        c1 = (k[0] * l) ** 2
+        c2 = (k[1] * l) ** 2
+        window = window/np.sum(window)
+    
+        mu1 = fftconvolve(self.im1, window, mode='valid')
+        mu2 = fftconvolve(self.im2, window, mode='valid')
+        mu1_sq = mu1 * mu1
+        mu2_sq = mu2 * mu2
+        mu1_mu2 = mu1 * mu2
+        sigma1_sq = fftconvolve(self.im1 * self.im1, window, mode='valid') - mu1_sq
+        sigma2_sq = fftconvolve(self.im2 * self.im2, window, mode='valid') - mu2_sq
+        sigma12 = fftconvolve(self.im1 * self.im2, window, mode='valid') - mu1_mu2
+    
+        if c1 > 0 and c2 > 0:
+            num = (2 * mu1_mu2 + c1) * (2 * sigma12 + c2)
+            den = (mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)
+            ssim_map = num / den
+        else:
+            num1 = 2 * mu1_mu2 + c1
+            num2 = 2 * sigma12 + c2
+            den1 = mu1_sq + mu2_sq + c1
+            den2 = sigma1_sq + sigma2_sq + c2
+            ssim_map = np.ones(np.shape(mu1))
+            index = (den1 * den2) > 0
+            ssim_map[index] = (num1[index] * num2[index]) / (den1[index] * den2[index])
+            index = (den1 != 0) & (den2 == 0)
+            ssim_map[index] = num1[index] / den1[index]    
+        mssim = ssim_map.mean()
+        return mssim, ssim_map
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/test_CPU_regularisers.py b/test/test_CPU_regularisers.py
index 552e64e..3a90d49 100644
--- a/test/test_CPU_regularisers.py
+++ b/test/test_CPU_regularisers.py
@@ -127,7 +127,5 @@ class TestRegularisers(unittest.TestCase):
         # now test that it generates some expected output
         self.assertAlmostEqual(rms, 0.02, delta=0.01)
 
-
-
 if __name__ == '__main__':
     unittest.main()
-- 
cgit v1.2.3


From fc941e0941facb9437dab667ba3350db071769da Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Mon, 25 Feb 2019 03:58:51 -0500
Subject: UPDATE: unit test

---
 test/test_CPU_regularisers.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/test/test_CPU_regularisers.py b/test/test_CPU_regularisers.py
index 3a90d49..6af4cd4 100644
--- a/test/test_CPU_regularisers.py
+++ b/test/test_CPU_regularisers.py
@@ -23,21 +23,8 @@ class TestRegularisers(unittest.TestCase):
         u_ref = Im + np.random.normal(loc=0,
                                       scale=0.01 * Im,
                                       size=np.shape(Im))
-        # map the u0 u0->u0>0
-        # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
         u0 = u0.astype('float32')
         u_ref = u_ref.astype('float32')
-        # set parameters
-        #pars = {'algorithm': alg, \
-        #        'input': u0, \
-        #        'regularisation_parameter': 0.04, \
-        #        'number_of_iterations': noi, \
-        #        'tolerance_constant': 0.00001, \
-        #        'methodTV': 0, \
-        #        'nonneg': 0, \
-        #        'printingOut': 0, \
-        #        'time_marching_parameter': 0.00002
-        #        }
         return Im,u0,u_ref
 
 
-- 
cgit v1.2.3


From 7bb99cfd904b23c041be273ffc2746296e6eb814 Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tomas.kulhanek@stfc.ac.uk>
Date: Mon, 25 Feb 2019 04:08:05 -0500
Subject: UPDATE: ignore IDE files

---
 .gitignore                            |  1 +
 .idea/CCPi-Regularisation-Toolkit.iml | 16 ----------------
 .idea/encodings.xml                   |  4 ----
 .idea/misc.xml                        |  7 -------
 .idea/modules.xml                     |  8 --------
 .idea/vcs.xml                         |  6 ------
 6 files changed, 1 insertion(+), 41 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 .idea/CCPi-Regularisation-Toolkit.iml
 delete mode 100644 .idea/encodings.xml
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/modules.xml
 delete mode 100644 .idea/vcs.xml

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..62c8935
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea/
\ No newline at end of file
diff --git a/.idea/CCPi-Regularisation-Toolkit.iml b/.idea/CCPi-Regularisation-Toolkit.iml
deleted file mode 100644
index c02bd4f..0000000
--- a/.idea/CCPi-Regularisation-Toolkit.iml
+++ /dev/null
@@ -1,16 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/src/Python/src" isTestSource="false" />
-      <sourceFolder url="file://$MODULE_DIR$/src/Python/ccpi" isTestSource="false" />
-    </content>
-    <orderEntry type="jdk" jdkName="Python 3.6 (py3)" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
deleted file mode 100644
index 15a15b2..0000000
--- a/.idea/encodings.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Encoding" addBOMForNewFiles="with NO BOM" />
-</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index c078c5c..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (py3)" project-jdk-type="Python SDK" />
-  <component name="PyCharmProfessionalAdvertiser">
-    <option name="shown" value="true" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index e00e88e..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/CCPi-Regularisation-Toolkit.iml" filepath="$PROJECT_DIR$/.idea/CCPi-Regularisation-Toolkit.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
-- 
cgit v1.2.3


From 68e6f3397e8a450854f39a5d514e1f747b9031a4 Mon Sep 17 00:00:00 2001
From: Tomas Kulhanek <tmkulhanek@gmail.com>
Date: Thu, 28 Feb 2019 15:22:10 +0000
Subject: merge

---
 .../demos/SoftwareX_supp/Demo_RealData_Recon_SX.py | 231 ---------------
 .../SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py | 161 -----------
 .../SoftwareX_supp/Demo_SimulData_Recon_SX.py      | 309 ---------------------
 .../demos/SoftwareX_supp/Demo_SimulData_SX.py      | 117 --------
 Wrappers/Python/demos/SoftwareX_supp/Readme.md     |  26 --
 .../optim_param/Optim_admm_rofllt.h5               | Bin 2408 -> 0 bytes
 .../SoftwareX_supp/optim_param/Optim_admm_sbtv.h5  | Bin 2408 -> 0 bytes
 .../SoftwareX_supp/optim_param/Optim_admm_tgv.h5   | Bin 2408 -> 0 bytes
 demos/SoftwareX_supp/Demo_RealData_Recon_SX.py     | 231 +++++++++++++++
 .../SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py | 161 +++++++++++
 demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py    | 309 +++++++++++++++++++++
 demos/SoftwareX_supp/Demo_SimulData_SX.py          | 117 ++++++++
 demos/SoftwareX_supp/Readme.md                     |  26 ++
 .../optim_param/Optim_admm_rofllt.h5               | Bin 0 -> 2408 bytes
 .../SoftwareX_supp/optim_param/Optim_admm_sbtv.h5  | Bin 0 -> 2408 bytes
 demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 | Bin 0 -> 2408 bytes
 recipe/meta.yaml                                   |   2 +
 recipe/run_test.py                                 |   2 +
 test/test_CPU_regularisers.py                      |   1 +
 19 files changed, 849 insertions(+), 844 deletions(-)
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/Readme.md
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5
 delete mode 100644 Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5
 create mode 100644 demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
 create mode 100644 demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py
 create mode 100644 demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
 create mode 100644 demos/SoftwareX_supp/Demo_SimulData_SX.py
 create mode 100644 demos/SoftwareX_supp/Readme.md
 create mode 100644 demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5
 create mode 100644 demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5
 create mode 100644 demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5

diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
deleted file mode 100644
index 01491d9..0000000
--- a/Wrappers/Python/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-This demo scripts support the following publication: 
-"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
-proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
- Philip J. Withers; Software X, 2019
-____________________________________________________________________________
-* Reads real tomographic data (stored at Zenodo)
---- https://doi.org/10.5281/zenodo.2578893
-* Reconstructs using TomoRec software
-* Saves reconstructed images 
-____________________________________________________________________________
->>>>> Dependencies: <<<<<
-1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox
-2. TomoRec: conda install -c dkazanc tomorec
-or install from https://github.com/dkazanc/TomoRec
-3. libtiff if one needs to save tiff images:
-    install pip install libtiff
-
-@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
-GPLv3 license (ASTRA toolbox)
-"""
-import numpy as np
-import matplotlib.pyplot as plt
-import h5py
-from tomorec.supp.suppTools import normaliser
-import time
-
-# load dendritic projection data
-h5f = h5py.File('data/DendrData_3D.h5','r')
-dataRaw = h5f['dataRaw'][:]
-flats = h5f['flats'][:]
-darks = h5f['darks'][:]
-angles_rad = h5f['angles_rad'][:]
-h5f.close()
-#%%
-# normalise the data [detectorsVert, Projections, detectorsHoriz]
-data_norm = normaliser(dataRaw, flats, darks, log='log')
-del dataRaw, darks, flats
-
-intens_max = 2.3
-plt.figure() 
-plt.subplot(131)
-plt.imshow(data_norm[:,150,:],vmin=0, vmax=intens_max)
-plt.title('2D Projection (analytical)')
-plt.subplot(132)
-plt.imshow(data_norm[300,:,:],vmin=0, vmax=intens_max)
-plt.title('Sinogram view')
-plt.subplot(133)
-plt.imshow(data_norm[:,:,600],vmin=0, vmax=intens_max)
-plt.title('Tangentogram view')
-plt.show()
-
-detectorHoriz = np.size(data_norm,2)
-det_y_crop = [i for i in range(0,detectorHoriz-22)]
-N_size = 950 # reconstruction domain
-time_label = int(time.time())
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("%%%%%%%%%%%%Reconstructing with FBP method %%%%%%%%%%%%%%%%%")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-from tomorec.methodsDIR import RecToolsDIR
-
-RectoolsDIR = RecToolsDIR(DetectorsDimH = np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = 100,  # DetectorsDimV # detector dimension (vertical) for 3D case only
-                    AnglesVec = angles_rad, # array of angles in radians
-                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
-                    device='gpu')
-
-FBPrec = RectoolsDIR.FBP(data_norm[0:100,:,det_y_crop])
-
-sliceSel = 50
-max_val = 0.003
-plt.figure() 
-plt.subplot(131)
-plt.imshow(FBPrec[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray")
-plt.title('FBP Reconstruction, axial view')
-
-plt.subplot(132)
-plt.imshow(FBPrec[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray")
-plt.title('FBP Reconstruction, coronal view')
-
-plt.subplot(133)
-plt.imshow(FBPrec[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray")
-plt.title('FBP Reconstruction, sagittal view')
-plt.show()
-
-# saving to tiffs (16bit)
-"""
-from libtiff import TIFF
-FBPrec += np.abs(np.min(FBPrec))
-multiplier = (int)(65535/(np.max(FBPrec)))
-
-# saving to tiffs (16bit)
-for i in range(0,np.size(FBPrec,0)):
-    tiff = TIFF.open('Dendr_FBP'+'_'+str(i)+'.tiff', mode='w')
-    tiff.write_image(np.uint16(FBPrec[i,:,:]*multiplier))
-    tiff.close()
-"""
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("Reconstructing with ADMM method using TomoRec software")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-# initialise TomoRec ITERATIVE reconstruction class ONCE
-from tomorec.methodsIR import RecToolsIR
-RectoolsIR = RecToolsIR(DetectorsDimH =  np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = 100,  # DetectorsDimV # detector dimension (vertical) for 3D case only
-                    AnglesVec = angles_rad, # array of angles in radians
-                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
-                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
-                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
-                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
-                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
-                    device='gpu')
-#%%
-print ("Reconstructing with ADMM method using SB-TV penalty")
-RecADMM_reg_sbtv = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop],
-                              rho_const = 2000.0, \
-                              iterationsADMM = 15, \
-                              regularisation = 'SB_TV', \
-                              regularisation_parameter = 0.00085,\
-                              regularisation_iterations = 50)
-
-sliceSel = 50
-max_val = 0.003
-plt.figure() 
-plt.subplot(131)
-plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray")
-plt.title('3D ADMM-SB-TV Reconstruction, axial view')
-
-plt.subplot(132)
-plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray")
-plt.title('3D ADMM-SB-TV Reconstruction, coronal view')
-
-plt.subplot(133)
-plt.imshow(RecADMM_reg_sbtv[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray")
-plt.title('3D ADMM-SB-TV Reconstruction, sagittal view')
-plt.show()
-
-
-# saving to tiffs (16bit)
-"""
-from libtiff import TIFF
-multiplier = (int)(65535/(np.max(RecADMM_reg_sbtv)))
-for i in range(0,np.size(RecADMM_reg_sbtv,0)):
-    tiff = TIFF.open('Dendr_ADMM_SBTV'+'_'+str(i)+'.tiff', mode='w')
-    tiff.write_image(np.uint16(RecADMM_reg_sbtv[i,:,:]*multiplier))
-    tiff.close()
-"""
-# Saving recpnstructed data with a unique time label
-np.save('Dendr_ADMM_SBTV'+str(time_label)+'.npy', RecADMM_reg_sbtv)
-del RecADMM_reg_sbtv
-#%%
-print ("Reconstructing with ADMM method using ROF-LLT penalty")
-RecADMM_reg_rofllt = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop],
-                              rho_const = 2000.0, \
-                              iterationsADMM = 15, \
-                              regularisation = 'LLT_ROF', \
-                              regularisation_parameter = 0.0009,\
-                              regularisation_parameter2 = 0.0007,\
-                              time_marching_parameter = 0.001,\
-                              regularisation_iterations = 550)
-
-sliceSel = 50
-max_val = 0.003
-plt.figure() 
-plt.subplot(131)
-plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-ROFLLT Reconstruction, axial view')
-
-plt.subplot(132)
-plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-ROFLLT Reconstruction, coronal view')
-
-plt.subplot(133)
-plt.imshow(RecADMM_reg_rofllt[:,:,sliceSel],vmin=0, vmax=max_val)
-plt.title('3D ADMM-ROFLLT Reconstruction, sagittal view')
-plt.show()
-
-# saving to tiffs (16bit)
-"""
-from libtiff import TIFF
-multiplier = (int)(65535/(np.max(RecADMM_reg_rofllt)))
-for i in range(0,np.size(RecADMM_reg_rofllt,0)):
-    tiff = TIFF.open('Dendr_ADMM_ROFLLT'+'_'+str(i)+'.tiff', mode='w')
-    tiff.write_image(np.uint16(RecADMM_reg_rofllt[i,:,:]*multiplier))
-    tiff.close()
-"""
-
-# Saving recpnstructed data with a unique time label
-np.save('Dendr_ADMM_ROFLLT'+str(time_label)+'.npy', RecADMM_reg_rofllt)
-del RecADMM_reg_rofllt
-#%%
-print ("Reconstructing with ADMM method using TGV penalty")
-RecADMM_reg_tgv = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop],
-                              rho_const = 2000.0, \
-                              iterationsADMM = 15, \
-                              regularisation = 'TGV', \
-                              regularisation_parameter = 0.01,\
-                              regularisation_iterations = 500)
-
-sliceSel = 50
-max_val = 0.003
-plt.figure() 
-plt.subplot(131)
-plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-TGV Reconstruction, axial view')
-
-plt.subplot(132)
-plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val)
-plt.title('3D ADMM-TGV Reconstruction, coronal view')
-
-plt.subplot(133)
-plt.imshow(RecADMM_reg_tgv[:,:,sliceSel],vmin=0, vmax=max_val)
-plt.title('3D ADMM-TGV Reconstruction, sagittal view')
-plt.show()
-
-# saving to tiffs (16bit)
-"""
-from libtiff import TIFF
-multiplier = (int)(65535/(np.max(RecADMM_reg_tgv)))
-for i in range(0,np.size(RecADMM_reg_tgv,0)):
-    tiff = TIFF.open('Dendr_ADMM_TGV'+'_'+str(i)+'.tiff', mode='w')
-    tiff.write_image(np.uint16(RecADMM_reg_tgv[i,:,:]*multiplier))
-    tiff.close()
-"""
-# Saving recpnstructed data with a unique time label
-np.save('Dendr_ADMM_TGV'+str(time_label)+'.npy', RecADMM_reg_tgv)
-del RecADMM_reg_tgv
-#%%
\ No newline at end of file
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py
deleted file mode 100644
index 59ffc0e..0000000
--- a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-This demo scripts support the following publication: 
-"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
-proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
- Philip J. Withers; Software X, 2019
-____________________________________________________________________________
-* Reads data which is previosly generated by TomoPhantom software (Zenodo link)
---- https://doi.org/10.5281/zenodo.2578893
-* Optimises for the regularisation parameters which later used in the script:
-Demo_SimulData_Recon_SX.py
-____________________________________________________________________________
->>>>> Dependencies: <<<<<
->>>>> Dependencies: <<<<<
-1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox
-2. TomoRec: conda install -c dkazanc tomorec
-or install from https://github.com/dkazanc/TomoRec
-
-@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
-GPLv3 license (ASTRA toolbox)
-"""
-#import timeit
-import matplotlib.pyplot as plt
-import numpy as np
-import h5py
-from ccpi.supp.qualitymetrics import QualityTools
-
-# loading the data 
-h5f = h5py.File('data/TomoSim_data1550671417.h5','r')
-phantom = h5f['phantom'][:]
-projdata_norm = h5f['projdata_norm'][:]
-proj_angles = h5f['proj_angles'][:]
-h5f.close()
-
-[Vert_det, AnglesNum, Horiz_det] = np.shape(projdata_norm)
-N_size = Vert_det
-
-sliceSel = 128
-#plt.gray()
-plt.figure() 
-plt.subplot(131)
-plt.imshow(phantom[sliceSel,:,:],vmin=0, vmax=1)
-plt.title('3D Phantom, axial view')
-
-plt.subplot(132)
-plt.imshow(phantom[:,sliceSel,:],vmin=0, vmax=1)
-plt.title('3D Phantom, coronal view')
-
-plt.subplot(133)
-plt.imshow(phantom[:,:,sliceSel],vmin=0, vmax=1)
-plt.title('3D Phantom, sagittal view')
-plt.show()
-
-intens_max = 240
-plt.figure() 
-plt.subplot(131)
-plt.imshow(projdata_norm[:,sliceSel,:],vmin=0, vmax=intens_max)
-plt.title('2D Projection (erroneous)')
-plt.subplot(132)
-plt.imshow(projdata_norm[sliceSel,:,:],vmin=0, vmax=intens_max)
-plt.title('Sinogram view')
-plt.subplot(133)
-plt.imshow(projdata_norm[:,:,sliceSel],vmin=0, vmax=intens_max)
-plt.title('Tangentogram view')
-plt.show()
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("Reconstructing with ADMM method using TomoRec software")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-# initialise TomoRec ITERATIVE reconstruction class ONCE
-from tomorec.methodsIR import RecToolsIR
-RectoolsIR = RecToolsIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only
-                    AnglesVec = proj_angles, # array of angles in radians
-                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
-                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
-                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
-                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
-                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
-                    device='gpu')
-#%%
-param_space = 30
-reg_param_sb_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters
-erros_vec_sbtv = np.zeros((param_space)) # a vector of errors
-
-print ("Reconstructing with ADMM method using SB-TV penalty")
-for i in range(0,param_space):
-    RecADMM_reg_sbtv = RectoolsIR.ADMM(projdata_norm,
-                                  rho_const = 2000.0, \
-                                  iterationsADMM = 15, \
-                                  regularisation = 'SB_TV', \
-                                  regularisation_parameter = reg_param_sb_vec[i],\
-                                  regularisation_iterations = 50)
-    # calculate errors 
-    Qtools = QualityTools(phantom, RecADMM_reg_sbtv)
-    erros_vec_sbtv[i] = Qtools.rmse()
-    print("RMSE for regularisation parameter {} for ADMM-SB-TV is {}".format(reg_param_sb_vec[i],erros_vec_sbtv[i]))
-
-plt.figure() 
-plt.plot(erros_vec_sbtv)
-
-# Saving generated data with a unique time label
-h5f = h5py.File('Optim_admm_sbtv.h5', 'w')
-h5f.create_dataset('reg_param_sb_vec', data=reg_param_sb_vec)
-h5f.create_dataset('erros_vec_sbtv', data=erros_vec_sbtv)
-h5f.close()
-#%%
-param_space = 30
-reg_param_rofllt_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters
-erros_vec_rofllt = np.zeros((param_space)) # a vector of errors
-
-print ("Reconstructing with ADMM method using ROF-LLT penalty")
-for i in range(0,param_space):
-    RecADMM_reg_rofllt = RectoolsIR.ADMM(projdata_norm,
-                                  rho_const = 2000.0, \
-                                  iterationsADMM = 15, \
-                                  regularisation = 'LLT_ROF', \
-                                  regularisation_parameter = reg_param_rofllt_vec[i],\
-                                  regularisation_parameter2 = 0.005,\
-                                  regularisation_iterations = 600)
-    # calculate errors 
-    Qtools = QualityTools(phantom, RecADMM_reg_rofllt)
-    erros_vec_rofllt[i] = Qtools.rmse()
-    print("RMSE for regularisation parameter {} for ADMM-ROF-LLT is {}".format(reg_param_rofllt_vec[i],erros_vec_rofllt[i]))
-
-plt.figure() 
-plt.plot(erros_vec_rofllt)
-
-# Saving generated data with a unique time label
-h5f = h5py.File('Optim_admm_rofllt.h5', 'w')
-h5f.create_dataset('reg_param_rofllt_vec', data=reg_param_rofllt_vec)
-h5f.create_dataset('erros_vec_rofllt', data=erros_vec_rofllt)
-h5f.close()
-#%%
-param_space = 30
-reg_param_tgv_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters
-erros_vec_tgv = np.zeros((param_space)) # a vector of errors
-
-print ("Reconstructing with ADMM method using TGV penalty")
-for i in range(0,param_space):
-    RecADMM_reg_tgv = RectoolsIR.ADMM(projdata_norm,
-                                  rho_const = 2000.0, \
-                                  iterationsADMM = 15, \
-                                  regularisation = 'TGV', \
-                                  regularisation_parameter = reg_param_tgv_vec[i],\
-                                  regularisation_iterations = 600)
-    # calculate errors 
-    Qtools = QualityTools(phantom, RecADMM_reg_tgv)
-    erros_vec_tgv[i] = Qtools.rmse()
-    print("RMSE for regularisation parameter {} for ADMM-TGV is {}".format(reg_param_tgv_vec[i],erros_vec_tgv[i]))
-
-plt.figure() 
-plt.plot(erros_vec_tgv)
-
-# Saving generated data with a unique time label
-h5f = h5py.File('Optim_admm_tgv.h5', 'w')
-h5f.create_dataset('reg_param_tgv_vec', data=reg_param_tgv_vec)
-h5f.create_dataset('erros_vec_tgv', data=erros_vec_tgv)
-h5f.close()
-#%%
\ No newline at end of file
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
deleted file mode 100644
index 93b0cef..0000000
--- a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
+++ /dev/null
@@ -1,309 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-This demo scripts support the following publication: 
-"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
-proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
- Philip J. Withers; Software X, 2019
-____________________________________________________________________________
-* Reads data which is previously generated by TomoPhantom software (Zenodo link)
---- https://doi.org/10.5281/zenodo.2578893
-* Reconstruct using optimised regularisation parameters (see Demo_SimulData_ParOptimis_SX.py)
-____________________________________________________________________________
->>>>> Dependencies: <<<<<
-1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox
-2. TomoRec: conda install -c dkazanc tomorec
-or install from https://github.com/dkazanc/TomoRec
-
-@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
-GPLv3 license (ASTRA toolbox)
-"""
-#import timeit
-import matplotlib.pyplot as plt
-import matplotlib.gridspec as gridspec
-import numpy as np
-import h5py
-from ccpi.supp.qualitymetrics import QualityTools
-from scipy.signal import gaussian
-
-# loading the data 
-h5f = h5py.File('data/TomoSim_data1550671417.h5','r')
-phantom = h5f['phantom'][:]
-projdata_norm = h5f['projdata_norm'][:]
-proj_angles = h5f['proj_angles'][:]
-h5f.close()
-
-[Vert_det, AnglesNum, Horiz_det] = np.shape(projdata_norm)
-N_size = Vert_det
-
-# loading optmisation parameters (the result of running Demo_SimulData_ParOptimis_SX)
-h5f = h5py.File('optim_param/Optim_admm_sbtv.h5','r')
-reg_param_sb_vec = h5f['reg_param_sb_vec'][:]
-erros_vec_sbtv = h5f['erros_vec_sbtv'][:]
-h5f.close()
-
-h5f = h5py.File('optim_param/Optim_admm_rofllt.h5','r')
-reg_param_rofllt_vec = h5f['reg_param_rofllt_vec'][:]
-erros_vec_rofllt = h5f['erros_vec_rofllt'][:]
-h5f.close()
-
-h5f = h5py.File('optim_param/Optim_admm_tgv.h5','r')
-reg_param_tgv_vec = h5f['reg_param_tgv_vec'][:]
-erros_vec_tgv = h5f['erros_vec_tgv'][:]
-h5f.close()
-
-index_minSBTV = min(xrange(len(erros_vec_sbtv)), key=erros_vec_sbtv.__getitem__)
-index_minROFLLT = min(xrange(len(erros_vec_rofllt)), key=erros_vec_rofllt.__getitem__)
-index_minTGV = min(xrange(len(erros_vec_tgv)), key=erros_vec_tgv.__getitem__)
-# assign optimal regularisation parameters:
-optimReg_sbtv = reg_param_sb_vec[index_minSBTV]
-optimReg_rofllt = reg_param_rofllt_vec[index_minROFLLT]
-optimReg_tgv = reg_param_tgv_vec[index_minTGV]
-#%%
-# plot loaded data
-sliceSel = 128
-#plt.figure() 
-fig, (ax1, ax2) = plt.subplots(figsize=(15, 5), ncols=2)
-plt.rcParams.update({'xtick.labelsize': 'x-small'})
-plt.rcParams.update({'ytick.labelsize':'x-small'})
-plt.subplot(121)
-one = plt.imshow(phantom[sliceSel,:,:],vmin=0, vmax=1, interpolation='none', cmap="PuOr")
-fig.colorbar(one, ax=ax1)
-plt.title('3D Phantom, axial (X-Y) view')
-plt.subplot(122)
-two = plt.imshow(phantom[:,sliceSel,:],vmin=0, vmax=1,interpolation='none', cmap="PuOr")
-fig.colorbar(two, ax=ax2)
-plt.title('3D Phantom, coronal (Y-Z) view')
-"""
-plt.subplot(133)
-plt.imshow(phantom[:,:,sliceSel],vmin=0, vmax=1, cmap="PuOr")
-plt.title('3D Phantom, sagittal view')
-
-"""
-plt.show()
-#%%
-intens_max = 220
-plt.figure() 
-plt.rcParams.update({'xtick.labelsize': 'x-small'})
-plt.rcParams.update({'ytick.labelsize':'x-small'})
-plt.subplot(131)
-plt.imshow(projdata_norm[:,sliceSel,:],vmin=0, vmax=intens_max, cmap="PuOr")
-plt.xlabel('X-detector', fontsize=16)
-plt.ylabel('Z-detector', fontsize=16)
-plt.title('2D Projection (X-Z) view', fontsize=19)
-plt.subplot(132)
-plt.imshow(projdata_norm[sliceSel,:,:],vmin=0, vmax=intens_max, cmap="PuOr")
-plt.xlabel('X-detector', fontsize=16)
-plt.ylabel('Projection angle', fontsize=16)
-plt.title('Sinogram (X-Y) view', fontsize=19)
-plt.subplot(133)
-plt.imshow(projdata_norm[:,:,sliceSel],vmin=0, vmax=intens_max, cmap="PuOr")
-plt.xlabel('Projection angle', fontsize=16)
-plt.ylabel('Z-detector', fontsize=16)
-plt.title('Vertical (Y-Z) view', fontsize=19)
-plt.show()
-#plt.savefig('projdata.pdf', format='pdf', dpi=1200)
-#%%
-# initialise TomoRec DIRECT reconstruction class ONCE
-from tomorec.methodsDIR import RecToolsDIR
-RectoolsDIR = RecToolsDIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only
-                    AnglesVec = proj_angles, # array of angles in radians
-                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
-                    device = 'gpu')
-#%%
-print ("Reconstruction using FBP from TomoRec")
-recFBP= RectoolsDIR.FBP(projdata_norm) # FBP reconstruction
-#%%
-x0, y0 = 0, 127 # These are in _pixel_ coordinates!!
-x1, y1 = 255, 127
-
-sliceSel = int(0.5*N_size)
-max_val = 1
-plt.figure(figsize = (20,5))
-gs1 = gridspec.GridSpec(1, 3)
-gs1.update(wspace=0.1, hspace=0.05) # set the spacing between axes. 
-ax1 = plt.subplot(gs1[0])
-plt.imshow(recFBP[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
-ax1.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
-plt.colorbar(ax=ax1)
-plt.title('FBP Reconstruction, axial (X-Y) view', fontsize=19)
-ax1.set_aspect('equal')
-ax3 = plt.subplot(gs1[1])
-plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
-plt.plot(recFBP[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
-plt.title('Profile', fontsize=19)
-ax2 = plt.subplot(gs1[2])
-plt.imshow(recFBP[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
-plt.title('FBP Reconstruction, coronal (Y-Z) view', fontsize=19)
-ax2.set_aspect('equal')
-plt.show()
-#plt.savefig('FBP_phantom.pdf', format='pdf', dpi=1600)
-
-# calculate errors 
-Qtools = QualityTools(phantom, recFBP)
-RMSE_fbp = Qtools.rmse()
-print("Root Mean Square Error for FBP is {}".format(RMSE_fbp))
-
-# SSIM measure
-Qtools = QualityTools(phantom[128,:,:]*255, recFBP[128,:,:]*235)
-win = np.array([gaussian(11, 1.5)])
-win2d = win * (win.T)
-ssim_fbp = Qtools.ssim(win2d)
-print("Mean SSIM for FBP is {}".format(ssim_fbp[0]))
-#%%
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-print ("Reconstructing with ADMM method using TomoRec software")
-print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
-# initialise TomoRec ITERATIVE reconstruction class ONCE
-from tomorec.methodsIR import RecToolsIR
-RectoolsIR = RecToolsIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal)
-                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only
-                    AnglesVec = proj_angles, # array of angles in radians
-                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
-                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
-                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
-                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
-                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
-                    device='gpu')
-#%%
-print ("Reconstructing with ADMM method using SB-TV penalty")
-RecADMM_reg_sbtv = RectoolsIR.ADMM(projdata_norm,
-                                  rho_const = 2000.0, \
-                                  iterationsADMM = 25, \
-                                  regularisation = 'SB_TV', \
-                                  regularisation_parameter = optimReg_sbtv,\
-                                  regularisation_iterations = 50)
-
-sliceSel = int(0.5*N_size)
-max_val = 1
-plt.figure(figsize = (20,3))
-gs1 = gridspec.GridSpec(1, 4)
-gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes. 
-ax1 = plt.subplot(gs1[0])
-plt.plot(reg_param_sb_vec, erros_vec_sbtv, color='k',linewidth=2)
-plt.xlabel('Regularisation parameter', fontsize=16)
-plt.ylabel('RMSE value', fontsize=16)
-plt.title('Regularisation selection', fontsize=19)
-ax2 = plt.subplot(gs1[1])
-plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
-ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
-plt.title('ADMM-SBTV (X-Y) view', fontsize=19)
-#ax2.set_aspect('equal')
-ax3 = plt.subplot(gs1[2])
-plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
-plt.plot(RecADMM_reg_sbtv[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
-plt.title('Profile', fontsize=19)
-ax4 = plt.subplot(gs1[3])
-plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
-plt.title('ADMM-SBTV (Y-Z) view', fontsize=19)
-plt.colorbar(ax=ax4)
-plt.show()
-plt.savefig('SBTV_phantom.pdf', format='pdf', dpi=1600)
-
-# calculate errors 
-Qtools = QualityTools(phantom, RecADMM_reg_sbtv)
-RMSE_admm_sbtv = Qtools.rmse()
-print("Root Mean Square Error for ADMM-SB-TV is {}".format(RMSE_admm_sbtv))
-
-# SSIM measure
-Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_sbtv[128,:,:]*235)
-win = np.array([gaussian(11, 1.5)])
-win2d = win * (win.T)
-ssim_admm_sbtv = Qtools.ssim(win2d)
-print("Mean SSIM ADMM-SBTV is {}".format(ssim_admm_sbtv[0]))
-#%%
-print ("Reconstructing with ADMM method using ROFLLT penalty")
-RecADMM_reg_rofllt = RectoolsIR.ADMM(projdata_norm,
-                                  rho_const = 2000.0, \
-                                  iterationsADMM = 25, \
-                                  regularisation = 'LLT_ROF', \
-                                  regularisation_parameter = optimReg_rofllt,\
-                                  regularisation_parameter2 = 0.0085,\
-                                  regularisation_iterations = 600)
-
-sliceSel = int(0.5*N_size)
-max_val = 1
-plt.figure(figsize = (20,3))
-gs1 = gridspec.GridSpec(1, 4)
-gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes. 
-ax1 = plt.subplot(gs1[0])
-plt.plot(reg_param_rofllt_vec, erros_vec_rofllt, color='k',linewidth=2)
-plt.xlabel('Regularisation parameter', fontsize=16)
-plt.ylabel('RMSE value', fontsize=16)
-plt.title('Regularisation selection', fontsize=19)
-ax2 = plt.subplot(gs1[1])
-plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
-ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
-plt.title('ADMM-ROFLLT (X-Y) view', fontsize=19)
-#ax2.set_aspect('equal')
-ax3 = plt.subplot(gs1[2])
-plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
-plt.plot(RecADMM_reg_rofllt[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
-plt.title('Profile', fontsize=19)
-ax4 = plt.subplot(gs1[3])
-plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
-plt.title('ADMM-ROFLLT (Y-Z) view', fontsize=19)
-plt.colorbar(ax=ax4)
-plt.show()
-#plt.savefig('ROFLLT_phantom.pdf', format='pdf', dpi=1600)
-
-# calculate errors 
-Qtools = QualityTools(phantom, RecADMM_reg_rofllt)
-RMSE_admm_rofllt = Qtools.rmse()
-print("Root Mean Square Error for ADMM-ROF-LLT is {}".format(RMSE_admm_rofllt))
-
-# SSIM measure
-Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_rofllt[128,:,:]*235)
-win = np.array([gaussian(11, 1.5)])
-win2d = win * (win.T)
-ssim_admm_rifllt = Qtools.ssim(win2d)
-print("Mean SSIM ADMM-ROFLLT is {}".format(ssim_admm_rifllt[0]))
-#%%
-print ("Reconstructing with ADMM method using TGV penalty")
-RecADMM_reg_tgv = RectoolsIR.ADMM(projdata_norm,
-                                  rho_const = 2000.0, \
-                                  iterationsADMM = 25, \
-                                  regularisation = 'TGV', \
-                                  regularisation_parameter = optimReg_tgv,\
-                                  regularisation_iterations = 600)
-#%%
-sliceSel = int(0.5*N_size)
-max_val = 1
-plt.figure(figsize = (20,3))
-gs1 = gridspec.GridSpec(1, 4)
-gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes. 
-ax1 = plt.subplot(gs1[0])
-plt.plot(reg_param_tgv_vec, erros_vec_tgv, color='k',linewidth=2)
-plt.xlabel('Regularisation parameter', fontsize=16)
-plt.ylabel('RMSE value', fontsize=16)
-plt.title('Regularisation selection', fontsize=19)
-ax2 = plt.subplot(gs1[1])
-plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
-ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
-plt.title('ADMM-TGV (X-Y) view', fontsize=19)
-#ax2.set_aspect('equal')
-ax3 = plt.subplot(gs1[2])
-plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
-plt.plot(RecADMM_reg_tgv[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
-plt.title('Profile', fontsize=19)
-ax4 = plt.subplot(gs1[3])
-plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
-plt.title('ADMM-TGV (Y-Z) view', fontsize=19)
-plt.colorbar(ax=ax4)
-plt.show()
-#plt.savefig('TGV_phantom.pdf', format='pdf', dpi=1600)
-
-# calculate errors 
-Qtools = QualityTools(phantom, RecADMM_reg_tgv)
-RMSE_admm_tgv = Qtools.rmse()
-print("Root Mean Square Error for ADMM-TGV is {}".format(RMSE_admm_tgv))
-
-# SSIM measure
-#Create a 2d gaussian for the window parameter
-Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_tgv[128,:,:]*235)
-win = np.array([gaussian(11, 1.5)])
-win2d = win * (win.T)
-ssim_admm_tgv = Qtools.ssim(win2d)
-print("Mean SSIM ADMM-TGV is {}".format(ssim_admm_tgv[0]))
-#%%
\ No newline at end of file
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py b/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py
deleted file mode 100644
index cdf4325..0000000
--- a/Wrappers/Python/demos/SoftwareX_supp/Demo_SimulData_SX.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-This demo scripts support the following publication: 
-"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
-proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
- Philip J. Withers; Software X, 2019
-____________________________________________________________________________
-* Runs TomoPhantom software to simulate tomographic projection data with
-some imaging errors and noise
-* Saves the data into hdf file to be uploaded in reconstruction scripts
-__________________________________________________________________________
-
->>>>> Dependencies: <<<<<
-1. TomoPhantom software for phantom and data generation
-
-@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
-Apache 2.0 license
-"""
-import timeit
-import os
-import matplotlib.pyplot as plt
-import numpy as np
-import tomophantom
-from tomophantom import TomoP3D
-from tomophantom.supp.flatsgen import flats
-from tomophantom.supp.normraw import normaliser_sim
-
-print ("Building 3D phantom using TomoPhantom software")
-tic=timeit.default_timer()
-model = 16 # select a model number from the library
-N_size = 256 # Define phantom dimensions using a scalar value (cubic phantom)
-path = os.path.dirname(tomophantom.__file__)
-path_library3D = os.path.join(path, "Phantom3DLibrary.dat")
-#This will generate a N_size x N_size x N_size phantom (3D)
-phantom_tm = TomoP3D.Model(model, N_size, path_library3D)
-toc=timeit.default_timer()
-Run_time = toc - tic
-print("Phantom has been built in {} seconds".format(Run_time))
-
-sliceSel = int(0.5*N_size)
-#plt.gray()
-plt.figure() 
-plt.subplot(131)
-plt.imshow(phantom_tm[sliceSel,:,:],vmin=0, vmax=1)
-plt.title('3D Phantom, axial view')
-
-plt.subplot(132)
-plt.imshow(phantom_tm[:,sliceSel,:],vmin=0, vmax=1)
-plt.title('3D Phantom, coronal view')
-
-plt.subplot(133)
-plt.imshow(phantom_tm[:,:,sliceSel],vmin=0, vmax=1)
-plt.title('3D Phantom, sagittal view')
-plt.show()
-
-# Projection geometry related parameters:
-Horiz_det = int(np.sqrt(2)*N_size) # detector column count (horizontal)
-Vert_det = N_size # detector row count (vertical) (no reason for it to be > N)
-angles_num = int(0.35*np.pi*N_size); # angles number
-angles = np.linspace(0.0,179.9,angles_num,dtype='float32') # in degrees
-angles_rad = angles*(np.pi/180.0)
-#%%
-print ("Building 3D analytical projection data with TomoPhantom")
-projData3D_analyt= TomoP3D.ModelSino(model, N_size, Horiz_det, Vert_det, angles, path_library3D)
-
-intens_max = N_size
-sliceSel = int(0.5*N_size)
-plt.figure() 
-plt.subplot(131)
-plt.imshow(projData3D_analyt[:,sliceSel,:],vmin=0, vmax=intens_max)
-plt.title('2D Projection (analytical)')
-plt.subplot(132)
-plt.imshow(projData3D_analyt[sliceSel,:,:],vmin=0, vmax=intens_max)
-plt.title('Sinogram view')
-plt.subplot(133)
-plt.imshow(projData3D_analyt[:,:,sliceSel],vmin=0, vmax=intens_max)
-plt.title('Tangentogram view')
-plt.show()
-#%%
-print ("Simulate flat fields, add noise and normalise projections...")
-flatsnum = 20 # generate 20 flat fields
-flatsSIM = flats(Vert_det, Horiz_det, maxheight = 0.1, maxthickness = 3, sigma_noise = 0.2, sigmasmooth = 3, flatsnum=flatsnum)
-
-plt.figure() 
-plt.imshow(flatsSIM[0,:,:],vmin=0, vmax=1)
-plt.title('A selected simulated flat-field')
-#%%
-# Apply normalisation of data and add noise
-flux_intensity = 60000 # controls the level of noise 
-sigma_flats = 0.01 # contro the level of noise in flats (higher creates more ring artifacts)
-projData3D_norm = normaliser_sim(projData3D_analyt, flatsSIM, sigma_flats, flux_intensity)
-
-intens_max = N_size
-sliceSel = int(0.5*N_size)
-plt.figure() 
-plt.subplot(131)
-plt.imshow(projData3D_norm[:,sliceSel,:],vmin=0, vmax=intens_max)
-plt.title('2D Projection (erroneous)')
-plt.subplot(132)
-plt.imshow(projData3D_norm[sliceSel,:,:],vmin=0, vmax=intens_max)
-plt.title('Sinogram view')
-plt.subplot(133)
-plt.imshow(projData3D_norm[:,:,sliceSel],vmin=0, vmax=intens_max)
-plt.title('Tangentogram view')
-plt.show()
-#%%
-import h5py
-import time
-time_label = int(time.time())
-# Saving generated data with a unique time label
-h5f = h5py.File('TomoSim_data'+str(time_label)+'.h5', 'w')
-h5f.create_dataset('phantom', data=phantom_tm)
-h5f.create_dataset('projdata_norm', data=projData3D_norm)
-h5f.create_dataset('proj_angles', data=angles_rad)
-h5f.close()
-#%%
\ No newline at end of file
diff --git a/Wrappers/Python/demos/SoftwareX_supp/Readme.md b/Wrappers/Python/demos/SoftwareX_supp/Readme.md
deleted file mode 100644
index 54e83f1..0000000
--- a/Wrappers/Python/demos/SoftwareX_supp/Readme.md
+++ /dev/null
@@ -1,26 +0,0 @@
-
-# SoftwareX publication [1] supporting files
-
-## Decription:
-The scripts here support publication in SoftwareX journal [1] to ensure reproducibility of the research. The scripts linked with data shared at Zenodo. 
-
-## Data:
-Data is shared at Zenodo [here](https://doi.org/10.5281/zenodo.2578893)
-
-## Dependencies:
-1. [ASTRA toolbox](https://github.com/astra-toolbox/astra-toolbox): `conda install -c astra-toolbox astra-toolbox`
-2. [TomoRec](https://github.com/dkazanc/TomoRec): `conda install -c dkazanc tomorec`
-3. [Tomophantom](https://github.com/dkazanc/TomoPhantom): `conda install tomophantom -c ccpi`
-
-## Files description: 
-- `Demo_SimulData_SX.py` - simulates 3D projection data using [Tomophantom](https://github.com/dkazanc/TomoPhantom) software. One can skip this module if the data is taken from [Zenodo](https://doi.org/10.5281/zenodo.2578893)
-- `Demo_SimulData_ParOptimis_SX.py` - runs computationally extensive calculations for optimal regularisation parameters, the result are saved into directory `optim_param`. This script can be also skipped. 
-- `Demo_SimulData_Recon_SX.py` - using established regularisation parameters, one runs iterative reconstruction
-- `Demo_RealData_Recon_SX.py` - runs real data reconstructions. Can be quite intense on memory so reduce the size of the reconstructed volume if needed. 
-
-### References:
-[1] "CCPi-Regularisation Toolkit for computed tomographic image reconstruction with proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner and Philip J. Withers; SoftwareX, 2019. 
-
-### Acknowledgments:
-CCPi-RGL software is a product of the [CCPi](https://www.ccpi.ac.uk/) group, STFC SCD software developers and Diamond Light Source (DLS). Any relevant questions/comments can be e-mailed to Daniil Kazantsev at dkazanc@hotmail.com
-
diff --git a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5
deleted file mode 100644
index 63bc4fd..0000000
Binary files a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 and /dev/null differ
diff --git a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5
deleted file mode 100644
index 03c0c14..0000000
Binary files a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 and /dev/null differ
diff --git a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 b/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5
deleted file mode 100644
index 056d915..0000000
Binary files a/Wrappers/Python/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 and /dev/null differ
diff --git a/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py b/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
new file mode 100644
index 0000000..01491d9
--- /dev/null
+++ b/demos/SoftwareX_supp/Demo_RealData_Recon_SX.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This demo scripts support the following publication: 
+"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
+proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
+ Philip J. Withers; Software X, 2019
+____________________________________________________________________________
+* Reads real tomographic data (stored at Zenodo)
+--- https://doi.org/10.5281/zenodo.2578893
+* Reconstructs using TomoRec software
+* Saves reconstructed images 
+____________________________________________________________________________
+>>>>> Dependencies: <<<<<
+1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox
+2. TomoRec: conda install -c dkazanc tomorec
+or install from https://github.com/dkazanc/TomoRec
+3. libtiff if one needs to save tiff images:
+    install pip install libtiff
+
+@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
+GPLv3 license (ASTRA toolbox)
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py
+from tomorec.supp.suppTools import normaliser
+import time
+
+# load dendritic projection data
+h5f = h5py.File('data/DendrData_3D.h5','r')
+dataRaw = h5f['dataRaw'][:]
+flats = h5f['flats'][:]
+darks = h5f['darks'][:]
+angles_rad = h5f['angles_rad'][:]
+h5f.close()
+#%%
+# normalise the data [detectorsVert, Projections, detectorsHoriz]
+data_norm = normaliser(dataRaw, flats, darks, log='log')
+del dataRaw, darks, flats
+
+intens_max = 2.3
+plt.figure() 
+plt.subplot(131)
+plt.imshow(data_norm[:,150,:],vmin=0, vmax=intens_max)
+plt.title('2D Projection (analytical)')
+plt.subplot(132)
+plt.imshow(data_norm[300,:,:],vmin=0, vmax=intens_max)
+plt.title('Sinogram view')
+plt.subplot(133)
+plt.imshow(data_norm[:,:,600],vmin=0, vmax=intens_max)
+plt.title('Tangentogram view')
+plt.show()
+
+detectorHoriz = np.size(data_norm,2)
+det_y_crop = [i for i in range(0,detectorHoriz-22)]
+N_size = 950 # reconstruction domain
+time_label = int(time.time())
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("%%%%%%%%%%%%Reconstructing with FBP method %%%%%%%%%%%%%%%%%")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+from tomorec.methodsDIR import RecToolsDIR
+
+RectoolsDIR = RecToolsDIR(DetectorsDimH = np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
+                    DetectorsDimV = 100,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    AnglesVec = angles_rad, # array of angles in radians
+                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
+                    device='gpu')
+
+FBPrec = RectoolsDIR.FBP(data_norm[0:100,:,det_y_crop])
+
+sliceSel = 50
+max_val = 0.003
+plt.figure() 
+plt.subplot(131)
+plt.imshow(FBPrec[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray")
+plt.title('FBP Reconstruction, axial view')
+
+plt.subplot(132)
+plt.imshow(FBPrec[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray")
+plt.title('FBP Reconstruction, coronal view')
+
+plt.subplot(133)
+plt.imshow(FBPrec[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray")
+plt.title('FBP Reconstruction, sagittal view')
+plt.show()
+
+# saving to tiffs (16bit)
+"""
+from libtiff import TIFF
+FBPrec += np.abs(np.min(FBPrec))
+multiplier = (int)(65535/(np.max(FBPrec)))
+
+# saving to tiffs (16bit)
+for i in range(0,np.size(FBPrec,0)):
+    tiff = TIFF.open('Dendr_FBP'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(FBPrec[i,:,:]*multiplier))
+    tiff.close()
+"""
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("Reconstructing with ADMM method using TomoRec software")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+# initialise TomoRec ITERATIVE reconstruction class ONCE
+from tomorec.methodsIR import RecToolsIR
+RectoolsIR = RecToolsIR(DetectorsDimH =  np.size(det_y_crop),  # DetectorsDimH # detector dimension (horizontal)
+                    DetectorsDimV = 100,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    AnglesVec = angles_rad, # array of angles in radians
+                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
+                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
+                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
+                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
+                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
+                    device='gpu')
+#%%
+print ("Reconstructing with ADMM method using SB-TV penalty")
+RecADMM_reg_sbtv = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop],
+                              rho_const = 2000.0, \
+                              iterationsADMM = 15, \
+                              regularisation = 'SB_TV', \
+                              regularisation_parameter = 0.00085,\
+                              regularisation_iterations = 50)
+
+sliceSel = 50
+max_val = 0.003
+plt.figure() 
+plt.subplot(131)
+plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="gray")
+plt.title('3D ADMM-SB-TV Reconstruction, axial view')
+
+plt.subplot(132)
+plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="gray")
+plt.title('3D ADMM-SB-TV Reconstruction, coronal view')
+
+plt.subplot(133)
+plt.imshow(RecADMM_reg_sbtv[:,:,sliceSel],vmin=0, vmax=max_val, cmap="gray")
+plt.title('3D ADMM-SB-TV Reconstruction, sagittal view')
+plt.show()
+
+
+# saving to tiffs (16bit)
+"""
+from libtiff import TIFF
+multiplier = (int)(65535/(np.max(RecADMM_reg_sbtv)))
+for i in range(0,np.size(RecADMM_reg_sbtv,0)):
+    tiff = TIFF.open('Dendr_ADMM_SBTV'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(RecADMM_reg_sbtv[i,:,:]*multiplier))
+    tiff.close()
+"""
+# Saving recpnstructed data with a unique time label
+np.save('Dendr_ADMM_SBTV'+str(time_label)+'.npy', RecADMM_reg_sbtv)
+del RecADMM_reg_sbtv
+#%%
+print ("Reconstructing with ADMM method using ROF-LLT penalty")
+RecADMM_reg_rofllt = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop],
+                              rho_const = 2000.0, \
+                              iterationsADMM = 15, \
+                              regularisation = 'LLT_ROF', \
+                              regularisation_parameter = 0.0009,\
+                              regularisation_parameter2 = 0.0007,\
+                              time_marching_parameter = 0.001,\
+                              regularisation_iterations = 550)
+
+sliceSel = 50
+max_val = 0.003
+plt.figure() 
+plt.subplot(131)
+plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-ROFLLT Reconstruction, axial view')
+
+plt.subplot(132)
+plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-ROFLLT Reconstruction, coronal view')
+
+plt.subplot(133)
+plt.imshow(RecADMM_reg_rofllt[:,:,sliceSel],vmin=0, vmax=max_val)
+plt.title('3D ADMM-ROFLLT Reconstruction, sagittal view')
+plt.show()
+
+# saving to tiffs (16bit)
+"""
+from libtiff import TIFF
+multiplier = (int)(65535/(np.max(RecADMM_reg_rofllt)))
+for i in range(0,np.size(RecADMM_reg_rofllt,0)):
+    tiff = TIFF.open('Dendr_ADMM_ROFLLT'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(RecADMM_reg_rofllt[i,:,:]*multiplier))
+    tiff.close()
+"""
+
+# Saving recpnstructed data with a unique time label
+np.save('Dendr_ADMM_ROFLLT'+str(time_label)+'.npy', RecADMM_reg_rofllt)
+del RecADMM_reg_rofllt
+#%%
+print ("Reconstructing with ADMM method using TGV penalty")
+RecADMM_reg_tgv = RectoolsIR.ADMM(data_norm[0:100,:,det_y_crop],
+                              rho_const = 2000.0, \
+                              iterationsADMM = 15, \
+                              regularisation = 'TGV', \
+                              regularisation_parameter = 0.01,\
+                              regularisation_iterations = 500)
+
+sliceSel = 50
+max_val = 0.003
+plt.figure() 
+plt.subplot(131)
+plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-TGV Reconstruction, axial view')
+
+plt.subplot(132)
+plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val)
+plt.title('3D ADMM-TGV Reconstruction, coronal view')
+
+plt.subplot(133)
+plt.imshow(RecADMM_reg_tgv[:,:,sliceSel],vmin=0, vmax=max_val)
+plt.title('3D ADMM-TGV Reconstruction, sagittal view')
+plt.show()
+
+# saving to tiffs (16bit)
+"""
+from libtiff import TIFF
+multiplier = (int)(65535/(np.max(RecADMM_reg_tgv)))
+for i in range(0,np.size(RecADMM_reg_tgv,0)):
+    tiff = TIFF.open('Dendr_ADMM_TGV'+'_'+str(i)+'.tiff', mode='w')
+    tiff.write_image(np.uint16(RecADMM_reg_tgv[i,:,:]*multiplier))
+    tiff.close()
+"""
+# Saving recpnstructed data with a unique time label
+np.save('Dendr_ADMM_TGV'+str(time_label)+'.npy', RecADMM_reg_tgv)
+del RecADMM_reg_tgv
+#%%
\ No newline at end of file
diff --git a/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py b/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py
new file mode 100644
index 0000000..59ffc0e
--- /dev/null
+++ b/demos/SoftwareX_supp/Demo_SimulData_ParOptimis_SX.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This demo scripts support the following publication: 
+"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
+proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
+ Philip J. Withers; Software X, 2019
+____________________________________________________________________________
+* Reads data which is previosly generated by TomoPhantom software (Zenodo link)
+--- https://doi.org/10.5281/zenodo.2578893
+* Optimises for the regularisation parameters which later used in the script:
+Demo_SimulData_Recon_SX.py
+____________________________________________________________________________
+>>>>> Dependencies: <<<<<
+>>>>> Dependencies: <<<<<
+1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox
+2. TomoRec: conda install -c dkazanc tomorec
+or install from https://github.com/dkazanc/TomoRec
+
+@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
+GPLv3 license (ASTRA toolbox)
+"""
+#import timeit
+import matplotlib.pyplot as plt
+import numpy as np
+import h5py
+from ccpi.supp.qualitymetrics import QualityTools
+
+# loading the data 
+h5f = h5py.File('data/TomoSim_data1550671417.h5','r')
+phantom = h5f['phantom'][:]
+projdata_norm = h5f['projdata_norm'][:]
+proj_angles = h5f['proj_angles'][:]
+h5f.close()
+
+[Vert_det, AnglesNum, Horiz_det] = np.shape(projdata_norm)
+N_size = Vert_det
+
+sliceSel = 128
+#plt.gray()
+plt.figure() 
+plt.subplot(131)
+plt.imshow(phantom[sliceSel,:,:],vmin=0, vmax=1)
+plt.title('3D Phantom, axial view')
+
+plt.subplot(132)
+plt.imshow(phantom[:,sliceSel,:],vmin=0, vmax=1)
+plt.title('3D Phantom, coronal view')
+
+plt.subplot(133)
+plt.imshow(phantom[:,:,sliceSel],vmin=0, vmax=1)
+plt.title('3D Phantom, sagittal view')
+plt.show()
+
+intens_max = 240
+plt.figure() 
+plt.subplot(131)
+plt.imshow(projdata_norm[:,sliceSel,:],vmin=0, vmax=intens_max)
+plt.title('2D Projection (erroneous)')
+plt.subplot(132)
+plt.imshow(projdata_norm[sliceSel,:,:],vmin=0, vmax=intens_max)
+plt.title('Sinogram view')
+plt.subplot(133)
+plt.imshow(projdata_norm[:,:,sliceSel],vmin=0, vmax=intens_max)
+plt.title('Tangentogram view')
+plt.show()
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("Reconstructing with ADMM method using TomoRec software")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+# initialise TomoRec ITERATIVE reconstruction class ONCE
+from tomorec.methodsIR import RecToolsIR
+RectoolsIR = RecToolsIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal)
+                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    AnglesVec = proj_angles, # array of angles in radians
+                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
+                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
+                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
+                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
+                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
+                    device='gpu')
+#%%
+param_space = 30
+reg_param_sb_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters
+erros_vec_sbtv = np.zeros((param_space)) # a vector of errors
+
+print ("Reconstructing with ADMM method using SB-TV penalty")
+for i in range(0,param_space):
+    RecADMM_reg_sbtv = RectoolsIR.ADMM(projdata_norm,
+                                  rho_const = 2000.0, \
+                                  iterationsADMM = 15, \
+                                  regularisation = 'SB_TV', \
+                                  regularisation_parameter = reg_param_sb_vec[i],\
+                                  regularisation_iterations = 50)
+    # calculate errors 
+    Qtools = QualityTools(phantom, RecADMM_reg_sbtv)
+    erros_vec_sbtv[i] = Qtools.rmse()
+    print("RMSE for regularisation parameter {} for ADMM-SB-TV is {}".format(reg_param_sb_vec[i],erros_vec_sbtv[i]))
+
+plt.figure() 
+plt.plot(erros_vec_sbtv)
+
+# Saving generated data with a unique time label
+h5f = h5py.File('Optim_admm_sbtv.h5', 'w')
+h5f.create_dataset('reg_param_sb_vec', data=reg_param_sb_vec)
+h5f.create_dataset('erros_vec_sbtv', data=erros_vec_sbtv)
+h5f.close()
+#%%
+param_space = 30
+reg_param_rofllt_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters
+erros_vec_rofllt = np.zeros((param_space)) # a vector of errors
+
+print ("Reconstructing with ADMM method using ROF-LLT penalty")
+for i in range(0,param_space):
+    RecADMM_reg_rofllt = RectoolsIR.ADMM(projdata_norm,
+                                  rho_const = 2000.0, \
+                                  iterationsADMM = 15, \
+                                  regularisation = 'LLT_ROF', \
+                                  regularisation_parameter = reg_param_rofllt_vec[i],\
+                                  regularisation_parameter2 = 0.005,\
+                                  regularisation_iterations = 600)
+    # calculate errors 
+    Qtools = QualityTools(phantom, RecADMM_reg_rofllt)
+    erros_vec_rofllt[i] = Qtools.rmse()
+    print("RMSE for regularisation parameter {} for ADMM-ROF-LLT is {}".format(reg_param_rofllt_vec[i],erros_vec_rofllt[i]))
+
+plt.figure() 
+plt.plot(erros_vec_rofllt)
+
+# Saving generated data with a unique time label
+h5f = h5py.File('Optim_admm_rofllt.h5', 'w')
+h5f.create_dataset('reg_param_rofllt_vec', data=reg_param_rofllt_vec)
+h5f.create_dataset('erros_vec_rofllt', data=erros_vec_rofllt)
+h5f.close()
+#%%
+param_space = 30
+reg_param_tgv_vec = np.linspace(0.03,0.15,param_space,dtype='float32') # a vector of parameters
+erros_vec_tgv = np.zeros((param_space)) # a vector of errors
+
+print ("Reconstructing with ADMM method using TGV penalty")
+for i in range(0,param_space):
+    RecADMM_reg_tgv = RectoolsIR.ADMM(projdata_norm,
+                                  rho_const = 2000.0, \
+                                  iterationsADMM = 15, \
+                                  regularisation = 'TGV', \
+                                  regularisation_parameter = reg_param_tgv_vec[i],\
+                                  regularisation_iterations = 600)
+    # calculate errors 
+    Qtools = QualityTools(phantom, RecADMM_reg_tgv)
+    erros_vec_tgv[i] = Qtools.rmse()
+    print("RMSE for regularisation parameter {} for ADMM-TGV is {}".format(reg_param_tgv_vec[i],erros_vec_tgv[i]))
+
+plt.figure() 
+plt.plot(erros_vec_tgv)
+
+# Saving generated data with a unique time label
+h5f = h5py.File('Optim_admm_tgv.h5', 'w')
+h5f.create_dataset('reg_param_tgv_vec', data=reg_param_tgv_vec)
+h5f.create_dataset('erros_vec_tgv', data=erros_vec_tgv)
+h5f.close()
+#%%
\ No newline at end of file
diff --git a/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py b/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
new file mode 100644
index 0000000..93b0cef
--- /dev/null
+++ b/demos/SoftwareX_supp/Demo_SimulData_Recon_SX.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This demo scripts support the following publication: 
+"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
+proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
+ Philip J. Withers; Software X, 2019
+____________________________________________________________________________
+* Reads data which is previously generated by TomoPhantom software (Zenodo link)
+--- https://doi.org/10.5281/zenodo.2578893
+* Reconstruct using optimised regularisation parameters (see Demo_SimulData_ParOptimis_SX.py)
+____________________________________________________________________________
+>>>>> Dependencies: <<<<<
+1. ASTRA toolbox: conda install -c astra-toolbox astra-toolbox
+2. TomoRec: conda install -c dkazanc tomorec
+or install from https://github.com/dkazanc/TomoRec
+
+@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
+GPLv3 license (ASTRA toolbox)
+"""
+#import timeit
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import numpy as np
+import h5py
+from ccpi.supp.qualitymetrics import QualityTools
+from scipy.signal import gaussian
+
+# loading the data 
+h5f = h5py.File('data/TomoSim_data1550671417.h5','r')
+phantom = h5f['phantom'][:]
+projdata_norm = h5f['projdata_norm'][:]
+proj_angles = h5f['proj_angles'][:]
+h5f.close()
+
+[Vert_det, AnglesNum, Horiz_det] = np.shape(projdata_norm)
+N_size = Vert_det
+
+# loading optmisation parameters (the result of running Demo_SimulData_ParOptimis_SX)
+h5f = h5py.File('optim_param/Optim_admm_sbtv.h5','r')
+reg_param_sb_vec = h5f['reg_param_sb_vec'][:]
+erros_vec_sbtv = h5f['erros_vec_sbtv'][:]
+h5f.close()
+
+h5f = h5py.File('optim_param/Optim_admm_rofllt.h5','r')
+reg_param_rofllt_vec = h5f['reg_param_rofllt_vec'][:]
+erros_vec_rofllt = h5f['erros_vec_rofllt'][:]
+h5f.close()
+
+h5f = h5py.File('optim_param/Optim_admm_tgv.h5','r')
+reg_param_tgv_vec = h5f['reg_param_tgv_vec'][:]
+erros_vec_tgv = h5f['erros_vec_tgv'][:]
+h5f.close()
+
+index_minSBTV = min(xrange(len(erros_vec_sbtv)), key=erros_vec_sbtv.__getitem__)
+index_minROFLLT = min(xrange(len(erros_vec_rofllt)), key=erros_vec_rofllt.__getitem__)
+index_minTGV = min(xrange(len(erros_vec_tgv)), key=erros_vec_tgv.__getitem__)
+# assign optimal regularisation parameters:
+optimReg_sbtv = reg_param_sb_vec[index_minSBTV]
+optimReg_rofllt = reg_param_rofllt_vec[index_minROFLLT]
+optimReg_tgv = reg_param_tgv_vec[index_minTGV]
+#%%
+# plot loaded data
+sliceSel = 128
+#plt.figure() 
+fig, (ax1, ax2) = plt.subplots(figsize=(15, 5), ncols=2)
+plt.rcParams.update({'xtick.labelsize': 'x-small'})
+plt.rcParams.update({'ytick.labelsize':'x-small'})
+plt.subplot(121)
+one = plt.imshow(phantom[sliceSel,:,:],vmin=0, vmax=1, interpolation='none', cmap="PuOr")
+fig.colorbar(one, ax=ax1)
+plt.title('3D Phantom, axial (X-Y) view')
+plt.subplot(122)
+two = plt.imshow(phantom[:,sliceSel,:],vmin=0, vmax=1,interpolation='none', cmap="PuOr")
+fig.colorbar(two, ax=ax2)
+plt.title('3D Phantom, coronal (Y-Z) view')
+"""
+plt.subplot(133)
+plt.imshow(phantom[:,:,sliceSel],vmin=0, vmax=1, cmap="PuOr")
+plt.title('3D Phantom, sagittal view')
+
+"""
+plt.show()
+#%%
+intens_max = 220
+plt.figure() 
+plt.rcParams.update({'xtick.labelsize': 'x-small'})
+plt.rcParams.update({'ytick.labelsize':'x-small'})
+plt.subplot(131)
+plt.imshow(projdata_norm[:,sliceSel,:],vmin=0, vmax=intens_max, cmap="PuOr")
+plt.xlabel('X-detector', fontsize=16)
+plt.ylabel('Z-detector', fontsize=16)
+plt.title('2D Projection (X-Z) view', fontsize=19)
+plt.subplot(132)
+plt.imshow(projdata_norm[sliceSel,:,:],vmin=0, vmax=intens_max, cmap="PuOr")
+plt.xlabel('X-detector', fontsize=16)
+plt.ylabel('Projection angle', fontsize=16)
+plt.title('Sinogram (X-Y) view', fontsize=19)
+plt.subplot(133)
+plt.imshow(projdata_norm[:,:,sliceSel],vmin=0, vmax=intens_max, cmap="PuOr")
+plt.xlabel('Projection angle', fontsize=16)
+plt.ylabel('Z-detector', fontsize=16)
+plt.title('Vertical (Y-Z) view', fontsize=19)
+plt.show()
+#plt.savefig('projdata.pdf', format='pdf', dpi=1200)
+#%%
+# initialise TomoRec DIRECT reconstruction class ONCE
+from tomorec.methodsDIR import RecToolsDIR
+RectoolsDIR = RecToolsDIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal)
+                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    AnglesVec = proj_angles, # array of angles in radians
+                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
+                    device = 'gpu')
+#%%
+print ("Reconstruction using FBP from TomoRec")
+recFBP= RectoolsDIR.FBP(projdata_norm) # FBP reconstruction
+#%%
+x0, y0 = 0, 127 # These are in _pixel_ coordinates!!
+x1, y1 = 255, 127
+
+sliceSel = int(0.5*N_size)
+max_val = 1
+plt.figure(figsize = (20,5))
+gs1 = gridspec.GridSpec(1, 3)
+gs1.update(wspace=0.1, hspace=0.05) # set the spacing between axes. 
+ax1 = plt.subplot(gs1[0])
+plt.imshow(recFBP[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
+ax1.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
+plt.colorbar(ax=ax1)
+plt.title('FBP Reconstruction, axial (X-Y) view', fontsize=19)
+ax1.set_aspect('equal')
+ax3 = plt.subplot(gs1[1])
+plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
+plt.plot(recFBP[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
+plt.title('Profile', fontsize=19)
+ax2 = plt.subplot(gs1[2])
+plt.imshow(recFBP[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
+plt.title('FBP Reconstruction, coronal (Y-Z) view', fontsize=19)
+ax2.set_aspect('equal')
+plt.show()
+#plt.savefig('FBP_phantom.pdf', format='pdf', dpi=1600)
+
+# calculate errors 
+Qtools = QualityTools(phantom, recFBP)
+RMSE_fbp = Qtools.rmse()
+print("Root Mean Square Error for FBP is {}".format(RMSE_fbp))
+
+# SSIM measure
+Qtools = QualityTools(phantom[128,:,:]*255, recFBP[128,:,:]*235)
+win = np.array([gaussian(11, 1.5)])
+win2d = win * (win.T)
+ssim_fbp = Qtools.ssim(win2d)
+print("Mean SSIM for FBP is {}".format(ssim_fbp[0]))
+#%%
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("Reconstructing with ADMM method using TomoRec software")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+# initialise TomoRec ITERATIVE reconstruction class ONCE
+from tomorec.methodsIR import RecToolsIR
+RectoolsIR = RecToolsIR(DetectorsDimH = Horiz_det,  # DetectorsDimH # detector dimension (horizontal)
+                    DetectorsDimV = Vert_det,  # DetectorsDimV # detector dimension (vertical) for 3D case only
+                    AnglesVec = proj_angles, # array of angles in radians
+                    ObjSize = N_size, # a scalar to define reconstructed object dimensions
+                    datafidelity='LS',# data fidelity, choose LS, PWLS (wip), GH (wip), Student (wip)
+                    nonnegativity='ENABLE', # enable nonnegativity constraint (set to 'ENABLE')
+                    OS_number = None, # the number of subsets, NONE/(or > 1) ~ classical / ordered subsets
+                    tolerance = 1e-08, # tolerance to stop outer iterations earlier
+                    device='gpu')
+#%%
+print ("Reconstructing with ADMM method using SB-TV penalty")
+RecADMM_reg_sbtv = RectoolsIR.ADMM(projdata_norm,
+                                  rho_const = 2000.0, \
+                                  iterationsADMM = 25, \
+                                  regularisation = 'SB_TV', \
+                                  regularisation_parameter = optimReg_sbtv,\
+                                  regularisation_iterations = 50)
+
+sliceSel = int(0.5*N_size)
+max_val = 1
+plt.figure(figsize = (20,3))
+gs1 = gridspec.GridSpec(1, 4)
+gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes. 
+ax1 = plt.subplot(gs1[0])
+plt.plot(reg_param_sb_vec, erros_vec_sbtv, color='k',linewidth=2)
+plt.xlabel('Regularisation parameter', fontsize=16)
+plt.ylabel('RMSE value', fontsize=16)
+plt.title('Regularisation selection', fontsize=19)
+ax2 = plt.subplot(gs1[1])
+plt.imshow(RecADMM_reg_sbtv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
+ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
+plt.title('ADMM-SBTV (X-Y) view', fontsize=19)
+#ax2.set_aspect('equal')
+ax3 = plt.subplot(gs1[2])
+plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
+plt.plot(RecADMM_reg_sbtv[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
+plt.title('Profile', fontsize=19)
+ax4 = plt.subplot(gs1[3])
+plt.imshow(RecADMM_reg_sbtv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
+plt.title('ADMM-SBTV (Y-Z) view', fontsize=19)
+plt.colorbar(ax=ax4)
+plt.show()
+plt.savefig('SBTV_phantom.pdf', format='pdf', dpi=1600)
+
+# calculate errors 
+Qtools = QualityTools(phantom, RecADMM_reg_sbtv)
+RMSE_admm_sbtv = Qtools.rmse()
+print("Root Mean Square Error for ADMM-SB-TV is {}".format(RMSE_admm_sbtv))
+
+# SSIM measure
+Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_sbtv[128,:,:]*235)
+win = np.array([gaussian(11, 1.5)])
+win2d = win * (win.T)
+ssim_admm_sbtv = Qtools.ssim(win2d)
+print("Mean SSIM ADMM-SBTV is {}".format(ssim_admm_sbtv[0]))
+#%%
+print ("Reconstructing with ADMM method using ROFLLT penalty")
+RecADMM_reg_rofllt = RectoolsIR.ADMM(projdata_norm,
+                                  rho_const = 2000.0, \
+                                  iterationsADMM = 25, \
+                                  regularisation = 'LLT_ROF', \
+                                  regularisation_parameter = optimReg_rofllt,\
+                                  regularisation_parameter2 = 0.0085,\
+                                  regularisation_iterations = 600)
+
+sliceSel = int(0.5*N_size)
+max_val = 1
+plt.figure(figsize = (20,3))
+gs1 = gridspec.GridSpec(1, 4)
+gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes. 
+ax1 = plt.subplot(gs1[0])
+plt.plot(reg_param_rofllt_vec, erros_vec_rofllt, color='k',linewidth=2)
+plt.xlabel('Regularisation parameter', fontsize=16)
+plt.ylabel('RMSE value', fontsize=16)
+plt.title('Regularisation selection', fontsize=19)
+ax2 = plt.subplot(gs1[1])
+plt.imshow(RecADMM_reg_rofllt[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
+ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
+plt.title('ADMM-ROFLLT (X-Y) view', fontsize=19)
+#ax2.set_aspect('equal')
+ax3 = plt.subplot(gs1[2])
+plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
+plt.plot(RecADMM_reg_rofllt[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
+plt.title('Profile', fontsize=19)
+ax4 = plt.subplot(gs1[3])
+plt.imshow(RecADMM_reg_rofllt[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
+plt.title('ADMM-ROFLLT (Y-Z) view', fontsize=19)
+plt.colorbar(ax=ax4)
+plt.show()
+#plt.savefig('ROFLLT_phantom.pdf', format='pdf', dpi=1600)
+
+# calculate errors 
+Qtools = QualityTools(phantom, RecADMM_reg_rofllt)
+RMSE_admm_rofllt = Qtools.rmse()
+print("Root Mean Square Error for ADMM-ROF-LLT is {}".format(RMSE_admm_rofllt))
+
+# SSIM measure
+Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_rofllt[128,:,:]*235)
+win = np.array([gaussian(11, 1.5)])
+win2d = win * (win.T)
+ssim_admm_rifllt = Qtools.ssim(win2d)
+print("Mean SSIM ADMM-ROFLLT is {}".format(ssim_admm_rifllt[0]))
+#%%
+print ("Reconstructing with ADMM method using TGV penalty")
+RecADMM_reg_tgv = RectoolsIR.ADMM(projdata_norm,
+                                  rho_const = 2000.0, \
+                                  iterationsADMM = 25, \
+                                  regularisation = 'TGV', \
+                                  regularisation_parameter = optimReg_tgv,\
+                                  regularisation_iterations = 600)
+#%%
+sliceSel = int(0.5*N_size)
+max_val = 1
+plt.figure(figsize = (20,3))
+gs1 = gridspec.GridSpec(1, 4)
+gs1.update(wspace=0.02, hspace=0.01) # set the spacing between axes. 
+ax1 = plt.subplot(gs1[0])
+plt.plot(reg_param_tgv_vec, erros_vec_tgv, color='k',linewidth=2)
+plt.xlabel('Regularisation parameter', fontsize=16)
+plt.ylabel('RMSE value', fontsize=16)
+plt.title('Regularisation selection', fontsize=19)
+ax2 = plt.subplot(gs1[1])
+plt.imshow(RecADMM_reg_tgv[sliceSel,:,:],vmin=0, vmax=max_val, cmap="PuOr")
+ax2.plot([x0, x1], [y0, y1], 'ko-', linestyle='--')
+plt.title('ADMM-TGV (X-Y) view', fontsize=19)
+#ax2.set_aspect('equal')
+ax3 = plt.subplot(gs1[2])
+plt.plot(phantom[sliceSel,sliceSel,0:N_size],color='k',linewidth=2)
+plt.plot(RecADMM_reg_tgv[sliceSel,sliceSel,0:N_size],linestyle='--',color='g')
+plt.title('Profile', fontsize=19)
+ax4 = plt.subplot(gs1[3])
+plt.imshow(RecADMM_reg_tgv[:,sliceSel,:],vmin=0, vmax=max_val, cmap="PuOr")
+plt.title('ADMM-TGV (Y-Z) view', fontsize=19)
+plt.colorbar(ax=ax4)
+plt.show()
+#plt.savefig('TGV_phantom.pdf', format='pdf', dpi=1600)
+
+# calculate errors 
+Qtools = QualityTools(phantom, RecADMM_reg_tgv)
+RMSE_admm_tgv = Qtools.rmse()
+print("Root Mean Square Error for ADMM-TGV is {}".format(RMSE_admm_tgv))
+
+# SSIM measure
+#Create a 2d gaussian for the window parameter
+Qtools = QualityTools(phantom[128,:,:]*255, RecADMM_reg_tgv[128,:,:]*235)
+win = np.array([gaussian(11, 1.5)])
+win2d = win * (win.T)
+ssim_admm_tgv = Qtools.ssim(win2d)
+print("Mean SSIM ADMM-TGV is {}".format(ssim_admm_tgv[0]))
+#%%
\ No newline at end of file
diff --git a/demos/SoftwareX_supp/Demo_SimulData_SX.py b/demos/SoftwareX_supp/Demo_SimulData_SX.py
new file mode 100644
index 0000000..cdf4325
--- /dev/null
+++ b/demos/SoftwareX_supp/Demo_SimulData_SX.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This demo scripts support the following publication: 
+"CCPi-Regularisation Toolkit for computed tomographic image reconstruction with 
+proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner,
+ Philip J. Withers; Software X, 2019
+____________________________________________________________________________
+* Runs TomoPhantom software to simulate tomographic projection data with
+some imaging errors and noise
+* Saves the data into hdf file to be uploaded in reconstruction scripts
+__________________________________________________________________________
+
+>>>>> Dependencies: <<<<<
+1. TomoPhantom software for phantom and data generation
+
+@author: Daniil Kazantsev, e:mail daniil.kazantsev@diamond.ac.uk
+Apache 2.0 license
+"""
+import timeit
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+import tomophantom
+from tomophantom import TomoP3D
+from tomophantom.supp.flatsgen import flats
+from tomophantom.supp.normraw import normaliser_sim
+
+print ("Building 3D phantom using TomoPhantom software")
+tic=timeit.default_timer()
+model = 16 # select a model number from the library
+N_size = 256 # Define phantom dimensions using a scalar value (cubic phantom)
+path = os.path.dirname(tomophantom.__file__)
+path_library3D = os.path.join(path, "Phantom3DLibrary.dat")
+#This will generate a N_size x N_size x N_size phantom (3D)
+phantom_tm = TomoP3D.Model(model, N_size, path_library3D)
+toc=timeit.default_timer()
+Run_time = toc - tic
+print("Phantom has been built in {} seconds".format(Run_time))
+
+sliceSel = int(0.5*N_size)
+#plt.gray()
+plt.figure() 
+plt.subplot(131)
+plt.imshow(phantom_tm[sliceSel,:,:],vmin=0, vmax=1)
+plt.title('3D Phantom, axial view')
+
+plt.subplot(132)
+plt.imshow(phantom_tm[:,sliceSel,:],vmin=0, vmax=1)
+plt.title('3D Phantom, coronal view')
+
+plt.subplot(133)
+plt.imshow(phantom_tm[:,:,sliceSel],vmin=0, vmax=1)
+plt.title('3D Phantom, sagittal view')
+plt.show()
+
+# Projection geometry related parameters:
+Horiz_det = int(np.sqrt(2)*N_size) # detector column count (horizontal)
+Vert_det = N_size # detector row count (vertical) (no reason for it to be > N)
+angles_num = int(0.35*np.pi*N_size); # angles number
+angles = np.linspace(0.0,179.9,angles_num,dtype='float32') # in degrees
+angles_rad = angles*(np.pi/180.0)
+#%%
+print ("Building 3D analytical projection data with TomoPhantom")
+projData3D_analyt= TomoP3D.ModelSino(model, N_size, Horiz_det, Vert_det, angles, path_library3D)
+
+intens_max = N_size
+sliceSel = int(0.5*N_size)
+plt.figure() 
+plt.subplot(131)
+plt.imshow(projData3D_analyt[:,sliceSel,:],vmin=0, vmax=intens_max)
+plt.title('2D Projection (analytical)')
+plt.subplot(132)
+plt.imshow(projData3D_analyt[sliceSel,:,:],vmin=0, vmax=intens_max)
+plt.title('Sinogram view')
+plt.subplot(133)
+plt.imshow(projData3D_analyt[:,:,sliceSel],vmin=0, vmax=intens_max)
+plt.title('Tangentogram view')
+plt.show()
+#%%
+print ("Simulate flat fields, add noise and normalise projections...")
+flatsnum = 20 # generate 20 flat fields
+flatsSIM = flats(Vert_det, Horiz_det, maxheight = 0.1, maxthickness = 3, sigma_noise = 0.2, sigmasmooth = 3, flatsnum=flatsnum)
+
+plt.figure() 
+plt.imshow(flatsSIM[0,:,:],vmin=0, vmax=1)
+plt.title('A selected simulated flat-field')
+#%%
+# Apply normalisation of data and add noise
+flux_intensity = 60000 # controls the level of noise 
+sigma_flats = 0.01 # contro the level of noise in flats (higher creates more ring artifacts)
+projData3D_norm = normaliser_sim(projData3D_analyt, flatsSIM, sigma_flats, flux_intensity)
+
+intens_max = N_size
+sliceSel = int(0.5*N_size)
+plt.figure() 
+plt.subplot(131)
+plt.imshow(projData3D_norm[:,sliceSel,:],vmin=0, vmax=intens_max)
+plt.title('2D Projection (erroneous)')
+plt.subplot(132)
+plt.imshow(projData3D_norm[sliceSel,:,:],vmin=0, vmax=intens_max)
+plt.title('Sinogram view')
+plt.subplot(133)
+plt.imshow(projData3D_norm[:,:,sliceSel],vmin=0, vmax=intens_max)
+plt.title('Tangentogram view')
+plt.show()
+#%%
+import h5py
+import time
+time_label = int(time.time())
+# Saving generated data with a unique time label
+h5f = h5py.File('TomoSim_data'+str(time_label)+'.h5', 'w')
+h5f.create_dataset('phantom', data=phantom_tm)
+h5f.create_dataset('projdata_norm', data=projData3D_norm)
+h5f.create_dataset('proj_angles', data=angles_rad)
+h5f.close()
+#%%
\ No newline at end of file
diff --git a/demos/SoftwareX_supp/Readme.md b/demos/SoftwareX_supp/Readme.md
new file mode 100644
index 0000000..54e83f1
--- /dev/null
+++ b/demos/SoftwareX_supp/Readme.md
@@ -0,0 +1,26 @@
+
+# SoftwareX publication [1] supporting files
+
+## Decription:
+The scripts here support publication in SoftwareX journal [1] to ensure reproducibility of the research. The scripts linked with data shared at Zenodo. 
+
+## Data:
+Data is shared at Zenodo [here](https://doi.org/10.5281/zenodo.2578893)
+
+## Dependencies:
+1. [ASTRA toolbox](https://github.com/astra-toolbox/astra-toolbox): `conda install -c astra-toolbox astra-toolbox`
+2. [TomoRec](https://github.com/dkazanc/TomoRec): `conda install -c dkazanc tomorec`
+3. [Tomophantom](https://github.com/dkazanc/TomoPhantom): `conda install tomophantom -c ccpi`
+
+## Files description: 
+- `Demo_SimulData_SX.py` - simulates 3D projection data using [Tomophantom](https://github.com/dkazanc/TomoPhantom) software. One can skip this module if the data is taken from [Zenodo](https://doi.org/10.5281/zenodo.2578893)
+- `Demo_SimulData_ParOptimis_SX.py` - runs computationally extensive calculations for optimal regularisation parameters, the result are saved into directory `optim_param`. This script can be also skipped. 
+- `Demo_SimulData_Recon_SX.py` - using established regularisation parameters, one runs iterative reconstruction
+- `Demo_RealData_Recon_SX.py` - runs real data reconstructions. Can be quite intense on memory so reduce the size of the reconstructed volume if needed. 
+
+### References:
+[1] "CCPi-Regularisation Toolkit for computed tomographic image reconstruction with proximal splitting algorithms" by Daniil Kazantsev, Edoardo Pasca, Martin J. Turner and Philip J. Withers; SoftwareX, 2019. 
+
+### Acknowledgments:
+CCPi-RGL software is a product of the [CCPi](https://www.ccpi.ac.uk/) group, STFC SCD software developers and Diamond Light Source (DLS). Any relevant questions/comments can be e-mailed to Daniil Kazantsev at dkazanc@hotmail.com
+
diff --git a/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 b/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5
new file mode 100644
index 0000000..63bc4fd
Binary files /dev/null and b/demos/SoftwareX_supp/optim_param/Optim_admm_rofllt.h5 differ
diff --git a/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 b/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5
new file mode 100644
index 0000000..03c0c14
Binary files /dev/null and b/demos/SoftwareX_supp/optim_param/Optim_admm_sbtv.h5 differ
diff --git a/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 b/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5
new file mode 100644
index 0000000..056d915
Binary files /dev/null and b/demos/SoftwareX_supp/optim_param/Optim_admm_tgv.h5 differ
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 527ad32..6f36906 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -14,6 +14,8 @@ test:
   requires:
     - pillow
     - pillow=4.1.1 # [win]
+#  command:
+#    - unittest -d discover .... ../test
 
 requirements:
   build:
diff --git a/recipe/run_test.py b/recipe/run_test.py
index 21f3216..f551616 100755
--- a/recipe/run_test.py
+++ b/recipe/run_test.py
@@ -815,5 +815,7 @@ class TestRegularisers(unittest.TestCase):
 
         self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
 
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_CPU_regularisers.py b/test/test_CPU_regularisers.py
index 6af4cd4..379b989 100644
--- a/test/test_CPU_regularisers.py
+++ b/test/test_CPU_regularisers.py
@@ -2,6 +2,7 @@ import unittest
 import math
 import os
 import timeit
+import numpy as np
 from ccpi.filters.regularisers import FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, Diff4th, ROF_TV
 from testroutines import *
 
-- 
cgit v1.2.3