diff options
26 files changed, 4103 insertions, 45 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6295562..93684f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -80,6 +80,7 @@ set(ufofilter_SRCS ufo-sliding-stack-task.c ufo-stack-task.c ufo-stacked-backproject-task.c + ufo-stacked-forwardproject-task.c ufo-stdin-task.c ufo-stitch-task.c ufo-tile-task.c diff --git a/src/kernels/CMakeFiles/CMakeDirectoryInformation.cmake b/src/kernels/CMakeFiles/CMakeDirectoryInformation.cmake new file mode 100644 index 0000000..b70123c --- /dev/null +++ b/src/kernels/CMakeFiles/CMakeDirectoryInformation.cmake @@ -0,0 +1,16 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.16 + +# Relative path conversion top directories. +set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/ccpi/repos/ufo-filters") +set(CMAKE_RELATIVE_PATH_TOP_BINARY "/ccpi/repos/ufo-filters") + +# Force unix paths in dependencies. +set(CMAKE_FORCE_UNIX_PATHS 1) + + +# The C and CXX include file regular expressions for this directory. +set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") +set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") +set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) +set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/src/kernels/CMakeFiles/burst.dir/DependInfo.cmake b/src/kernels/CMakeFiles/burst.dir/DependInfo.cmake new file mode 100644 index 0000000..edc37fc --- /dev/null +++ b/src/kernels/CMakeFiles/burst.dir/DependInfo.cmake @@ -0,0 +1,19 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + ) +# The set of files for implicit dependencies of each language: + +# Pairs of files generated by the same build rule. +set(CMAKE_MULTIPLE_OUTPUT_PAIRS + "/ccpi/repos/ufo-filters/src/kernels/center_kernel.cl" "/ccpi/repos/ufo-filters/src/kernels/z_kernel.cl" + "/ccpi/repos/ufo-filters/src/kernels/lamino_kernel.cl" "/ccpi/repos/ufo-filters/src/kernels/z_kernel.cl" + "/ccpi/repos/ufo-filters/src/kernels/roll_kernel.cl" "/ccpi/repos/ufo-filters/src/kernels/z_kernel.cl" + ) + + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/src/kernels/CMakeFiles/burst.dir/build.make b/src/kernels/CMakeFiles/burst.dir/build.make new file mode 100644 index 0000000..19d9eef --- /dev/null +++ b/src/kernels/CMakeFiles/burst.dir/build.make @@ -0,0 +1,105 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.16 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /ccpi/repos/ufo-filters + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /ccpi/repos/ufo-filters + +# Utility rule file for burst. + +# Include the progress variables for this target. +include src/kernels/CMakeFiles/burst.dir/progress.make + +src/kernels/CMakeFiles/burst: src/kernels/z_kernel.cl +src/kernels/CMakeFiles/burst: src/kernels/center_kernel.cl +src/kernels/CMakeFiles/burst: src/kernels/lamino_kernel.cl +src/kernels/CMakeFiles/burst: src/kernels/roll_kernel.cl + + +src/kernels/z_kernel.cl: src/kernels/tools/make_burst_kernels.py +src/kernels/z_kernel.cl: src/kernels/templates/common.in +src/kernels/z_kernel.cl: src/kernels/templates/definitions.in +src/kernels/z_kernel.cl: src/kernels/templates/z_template.in +src/kernels/z_kernel.cl: src/kernels/templates/center_template.in +src/kernels/z_kernel.cl: src/kernels/templates/lamino_template.in +src/kernels/z_kernel.cl: src/kernels/templates/roll_template.in + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/ccpi/repos/ufo-filters/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Generating burst backprojection kernels" + cd /ccpi/repos/ufo-filters/src/kernels && /usr/bin/python /ccpi/repos/ufo-filters/src/kernels/tools/make_burst_kernels.py /ccpi/repos/ufo-filters/src/kernels/templates/z_template.in 1 2 4 8 16 > z_kernel.cl + cd /ccpi/repos/ufo-filters/src/kernels && /usr/bin/python /ccpi/repos/ufo-filters/src/kernels/tools/make_burst_kernels.py /ccpi/repos/ufo-filters/src/kernels/templates/center_template.in 1 2 4 8 16 > center_kernel.cl + cd /ccpi/repos/ufo-filters/src/kernels && /usr/bin/python /ccpi/repos/ufo-filters/src/kernels/tools/make_burst_kernels.py /ccpi/repos/ufo-filters/src/kernels/templates/lamino_template.in 1 2 4 8 16 > lamino_kernel.cl + cd /ccpi/repos/ufo-filters/src/kernels && /usr/bin/python /ccpi/repos/ufo-filters/src/kernels/tools/make_burst_kernels.py /ccpi/repos/ufo-filters/src/kernels/templates/roll_template.in 1 2 4 8 16 > roll_kernel.cl + +src/kernels/center_kernel.cl: src/kernels/z_kernel.cl + @$(CMAKE_COMMAND) -E touch_nocreate src/kernels/center_kernel.cl + +src/kernels/lamino_kernel.cl: src/kernels/z_kernel.cl + @$(CMAKE_COMMAND) -E touch_nocreate src/kernels/lamino_kernel.cl + +src/kernels/roll_kernel.cl: src/kernels/z_kernel.cl + @$(CMAKE_COMMAND) -E touch_nocreate src/kernels/roll_kernel.cl + +burst: src/kernels/CMakeFiles/burst +burst: src/kernels/z_kernel.cl +burst: src/kernels/center_kernel.cl +burst: src/kernels/lamino_kernel.cl +burst: src/kernels/roll_kernel.cl +burst: src/kernels/CMakeFiles/burst.dir/build.make + +.PHONY : burst + +# Rule to build all files generated by this target. +src/kernels/CMakeFiles/burst.dir/build: burst + +.PHONY : src/kernels/CMakeFiles/burst.dir/build + +src/kernels/CMakeFiles/burst.dir/clean: + cd /ccpi/repos/ufo-filters/src/kernels && $(CMAKE_COMMAND) -P CMakeFiles/burst.dir/cmake_clean.cmake +.PHONY : src/kernels/CMakeFiles/burst.dir/clean + +src/kernels/CMakeFiles/burst.dir/depend: + cd /ccpi/repos/ufo-filters && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /ccpi/repos/ufo-filters /ccpi/repos/ufo-filters/src/kernels /ccpi/repos/ufo-filters /ccpi/repos/ufo-filters/src/kernels /ccpi/repos/ufo-filters/src/kernels/CMakeFiles/burst.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : src/kernels/CMakeFiles/burst.dir/depend + diff --git a/src/kernels/CMakeFiles/burst.dir/cmake_clean.cmake b/src/kernels/CMakeFiles/burst.dir/cmake_clean.cmake new file mode 100644 index 0000000..fd937bc --- /dev/null +++ b/src/kernels/CMakeFiles/burst.dir/cmake_clean.cmake @@ -0,0 +1,12 @@ +file(REMOVE_RECURSE + "CMakeFiles/burst" + "center_kernel.cl" + "lamino_kernel.cl" + "roll_kernel.cl" + "z_kernel.cl" +) + +# Per-language clean rules from dependency scanning. +foreach(lang ) + include(CMakeFiles/burst.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/src/kernels/CMakeFiles/burst.dir/depend.internal b/src/kernels/CMakeFiles/burst.dir/depend.internal new file mode 100644 index 0000000..f647855 --- /dev/null +++ b/src/kernels/CMakeFiles/burst.dir/depend.internal @@ -0,0 +1,3 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.16 + diff --git a/src/kernels/CMakeFiles/burst.dir/depend.make b/src/kernels/CMakeFiles/burst.dir/depend.make new file mode 100644 index 0000000..f647855 --- /dev/null +++ b/src/kernels/CMakeFiles/burst.dir/depend.make @@ -0,0 +1,3 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.16 + diff --git a/src/kernels/CMakeFiles/burst.dir/progress.make b/src/kernels/CMakeFiles/burst.dir/progress.make new file mode 100644 index 0000000..225de34 --- /dev/null +++ b/src/kernels/CMakeFiles/burst.dir/progress.make @@ -0,0 +1,2 @@ +CMAKE_PROGRESS_1 = + diff --git a/src/kernels/CMakeFiles/progress.marks b/src/kernels/CMakeFiles/progress.marks new file mode 100644 index 0000000..573541a --- /dev/null +++ b/src/kernels/CMakeFiles/progress.marks @@ -0,0 +1 @@ +0 diff --git a/src/kernels/CTestTestfile.cmake b/src/kernels/CTestTestfile.cmake new file mode 100644 index 0000000..4528b8b --- /dev/null +++ b/src/kernels/CTestTestfile.cmake @@ -0,0 +1,6 @@ +# CMake generated Testfile for +# Source directory: /ccpi/repos/ufo-filters/src/kernels +# Build directory: /ccpi/repos/ufo-filters/src/kernels +# +# This file includes the relevant testing commands required for +# testing this directory and lists subdirectories to be tested as well. diff --git a/src/kernels/Makefile b/src/kernels/Makefile new file mode 100644 index 0000000..a14a6a7 --- /dev/null +++ b/src/kernels/Makefile @@ -0,0 +1,212 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.16 + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /ccpi/repos/ufo-filters + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /ccpi/repos/ufo-filters + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target install/strip +install/strip: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." + /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip + +# Special rule for the target install/strip +install/strip/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." + /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip/fast + +# Special rule for the target install/local +install/local: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." + /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local + +# Special rule for the target install/local +install/local/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." + /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local/fast + +# Special rule for the target install +install: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." + /usr/bin/cmake -P cmake_install.cmake +.PHONY : install + +# Special rule for the target install +install/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." + /usr/bin/cmake -P cmake_install.cmake +.PHONY : install/fast + +# Special rule for the target list_install_components +list_install_components: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\"" +.PHONY : list_install_components + +# Special rule for the target list_install_components +list_install_components/fast: list_install_components + +.PHONY : list_install_components/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache + +.PHONY : rebuild_cache/fast + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." + /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache + +.PHONY : edit_cache/fast + +# Special rule for the target test +test: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running tests..." + /usr/bin/ctest --force-new-ctest-process $(ARGS) +.PHONY : test + +# Special rule for the target test +test/fast: test + +.PHONY : test/fast + +# The main all target +all: cmake_check_build_system + cd /ccpi/repos/ufo-filters && $(CMAKE_COMMAND) -E cmake_progress_start /ccpi/repos/ufo-filters/CMakeFiles /ccpi/repos/ufo-filters/src/kernels/CMakeFiles/progress.marks + cd /ccpi/repos/ufo-filters && $(MAKE) -f CMakeFiles/Makefile2 src/kernels/all + $(CMAKE_COMMAND) -E cmake_progress_start /ccpi/repos/ufo-filters/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + cd /ccpi/repos/ufo-filters && $(MAKE) -f CMakeFiles/Makefile2 src/kernels/clean +.PHONY : clean + +# The main clean target +clean/fast: clean + +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + cd /ccpi/repos/ufo-filters && $(MAKE) -f CMakeFiles/Makefile2 src/kernels/preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + cd /ccpi/repos/ufo-filters && $(MAKE) -f CMakeFiles/Makefile2 src/kernels/preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + cd /ccpi/repos/ufo-filters && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +# Convenience name for target. +src/kernels/CMakeFiles/burst.dir/rule: + cd /ccpi/repos/ufo-filters && $(MAKE) -f CMakeFiles/Makefile2 src/kernels/CMakeFiles/burst.dir/rule +.PHONY : src/kernels/CMakeFiles/burst.dir/rule + +# Convenience name for target. +burst: src/kernels/CMakeFiles/burst.dir/rule + +.PHONY : burst + +# fast build rule for target. +burst/fast: + cd /ccpi/repos/ufo-filters && $(MAKE) -f src/kernels/CMakeFiles/burst.dir/build.make src/kernels/CMakeFiles/burst.dir/build +.PHONY : burst/fast + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... install/strip" + @echo "... install/local" + @echo "... install" + @echo "... list_install_components" + @echo "... rebuild_cache" + @echo "... edit_cache" + @echo "... test" + @echo "... burst" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + cd /ccpi/repos/ufo-filters && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/src/kernels/backproject.cl b/src/kernels/backproject.cl index 8a81790..88467de 100644 --- a/src/kernels/backproject.cl +++ b/src/kernels/backproject.cl @@ -19,7 +19,7 @@ constant sampler_t volumeSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | - CLK_FILTER_LINEAR; + CLK_FILTER_NEAREST; kernel void backproject_nearest (global float *sinogram, @@ -86,10 +86,10 @@ backproject_tex (read_only image2d_t sinogram, #pragma unroll 4 #endif for(int proj = 0; proj < n_projections; proj++) { - float h = by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; + float h = -by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; sum += read_imagef (sinogram, volumeSampler, (float2)(h, proj + 0.5f)).x; } - slice[idy * get_global_size(0) + idx] = sum * M_PI_F / n_projections; + slice[idy * get_global_size(0) + idx] = sum; } diff --git a/src/kernels/center_kernel.cl b/src/kernels/center_kernel.cl new file mode 100644 index 0000000..ff9108b --- /dev/null +++ b/src/kernels/center_kernel.cl @@ -0,0 +1,478 @@ +#define rotate() pixel.x -= x_center.x; \ + pixel.y -= y_center; \ + pixel.x = pixel.x * cos_roll + pixel.y * sin_roll; \ + pixel.y = -pixel.x * sin_roll + pixel.y * cos_roll; \ + pixel.x += x_center.x; \ + pixel.y += y_center; + +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_1 ( +read_only image2d_t projection_0, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float sines, + const float cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + float x_center_current = mad((float) idz, x_center.y, x_center.x); + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines, mad(voxel.y, sines, x_center_current)); + pixel.y = mad(tmp_x, sines, mad(tmp_y, cosines, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_2 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float2 sines, + const float2 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + float x_center_current = mad((float) idz, x_center.y, x_center.x); + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center_current)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center_current)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_4 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float4 sines, + const float4 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + float x_center_current = mad((float) idz, x_center.y, x_center.x); + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center_current)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center_current)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center_current)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center_current)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_8 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float8 sines, + const float8 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + float x_center_current = mad((float) idz, x_center.y, x_center.x); + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center_current)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center_current)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center_current)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center_current)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center_current)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center_current)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center_current)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center_current)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_16 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, +read_only image2d_t projection_8, +read_only image2d_t projection_9, +read_only image2d_t projection_10, +read_only image2d_t projection_11, +read_only image2d_t projection_12, +read_only image2d_t projection_13, +read_only image2d_t projection_14, +read_only image2d_t projection_15, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float16 sines, + const float16 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + float x_center_current = mad((float) idz, x_center.y, x_center.x); + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center_current)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center_current)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center_current)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center_current)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center_current)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center_current)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center_current)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center_current)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s8, mad(voxel.y, sines.s8, x_center_current)); + pixel.y = mad(tmp_x, sines.s8, mad(tmp_y, cosines.s8, tmp)); + rotate (); + result += read_imagef (projection_8, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s9, mad(voxel.y, sines.s9, x_center_current)); + pixel.y = mad(tmp_x, sines.s9, mad(tmp_y, cosines.s9, tmp)); + rotate (); + result += read_imagef (projection_9, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sa, mad(voxel.y, sines.sa, x_center_current)); + pixel.y = mad(tmp_x, sines.sa, mad(tmp_y, cosines.sa, tmp)); + rotate (); + result += read_imagef (projection_10, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sb, mad(voxel.y, sines.sb, x_center_current)); + pixel.y = mad(tmp_x, sines.sb, mad(tmp_y, cosines.sb, tmp)); + rotate (); + result += read_imagef (projection_11, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sc, mad(voxel.y, sines.sc, x_center_current)); + pixel.y = mad(tmp_x, sines.sc, mad(tmp_y, cosines.sc, tmp)); + rotate (); + result += read_imagef (projection_12, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sd, mad(voxel.y, sines.sd, x_center_current)); + pixel.y = mad(tmp_x, sines.sd, mad(tmp_y, cosines.sd, tmp)); + rotate (); + result += read_imagef (projection_13, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.se, mad(voxel.y, sines.se, x_center_current)); + pixel.y = mad(tmp_x, sines.se, mad(tmp_y, cosines.se, tmp)); + rotate (); + result += read_imagef (projection_14, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sf, mad(voxel.y, sines.sf, x_center_current)); + pixel.y = mad(tmp_x, sines.sf, mad(tmp_y, cosines.sf, tmp)); + rotate (); + result += read_imagef (projection_15, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} + diff --git a/src/kernels/cmake_install.cmake b/src/kernels/cmake_install.cmake new file mode 100644 index 0000000..c3a2e2f --- /dev/null +++ b/src/kernels/cmake_install.cmake @@ -0,0 +1,684 @@ +# Install script for directory: /ccpi/repos/ufo-filters/src/kernels + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/z_kernel.cl;/usr/share/ufo/center_kernel.cl;/usr/share/ufo/lamino_kernel.cl;/usr/share/ufo/roll_kernel.cl;/usr/share/ufo/general_bp_definitions.in;/usr/share/ufo/general_bp_header_scalar.in;/usr/share/ufo/general_bp_header_vector.in;/usr/share/ufo/general_bp_body.in") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES + "/ccpi/repos/ufo-filters/src/kernels/z_kernel.cl" + "/ccpi/repos/ufo-filters/src/kernels/center_kernel.cl" + "/ccpi/repos/ufo-filters/src/kernels/lamino_kernel.cl" + "/ccpi/repos/ufo-filters/src/kernels/roll_kernel.cl" + "/ccpi/repos/ufo-filters/src/kernels/templates/general_bp_definitions.in" + "/ccpi/repos/ufo-filters/src/kernels/templates/general_bp_header_scalar.in" + "/ccpi/repos/ufo-filters/src/kernels/templates/general_bp_header_vector.in" + "/ccpi/repos/ufo-filters/src/kernels/templates/general_bp_body.in" + ) +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/arithmetics.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/arithmetics.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/backproject.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/backproject.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/bin.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/bin.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/binarize.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/binarize.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/center_kernel.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/center_kernel.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/clip.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/clip.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/complex.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/complex.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/conebeam.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/conebeam.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/correlate.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/correlate.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/cumsum.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/cumsum.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/cut-sinogram.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/cut-sinogram.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/cut.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/cut.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/denoise.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/denoise.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/dfi.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/dfi.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/edge.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/edge.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/estimate-noise.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/estimate-noise.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/ffc.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/ffc.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/fft.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/fft.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/fftmult.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/fftmult.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/filter.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/filter.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/flip.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/flip.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/forwardproject.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/forwardproject.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/gaussian.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/gaussian.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/gradient.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/gradient.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/histthreshold.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/histthreshold.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/interpolator.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/interpolator.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/lamino_kernel.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/lamino_kernel.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/mask.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/mask.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/median.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/median.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/metaballs.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/metaballs.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/morphology.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/morphology.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/nlm.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/nlm.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/opencl-reduce.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/opencl-reduce.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/opencl.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/opencl.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/ordfilt.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/ordfilt.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/pad.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/pad.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/phase-retrieval.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/phase-retrieval.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/piv.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/piv.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/polar.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/polar.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/reductor.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/reductor.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/rescale.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/rescale.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/rm-outliers.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/rm-outliers.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/roll_kernel.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/roll_kernel.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/rotate.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/rotate.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/segment.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/segment.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/split.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/split.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/stacked-backproject.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/stacked-backproject.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/stacked-forwardproject.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/stacked-forwardproject.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/swap-quadrants.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/swap-quadrants.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/transpose.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/transpose.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/z_kernel.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/z_kernel.cl") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + list(APPEND CMAKE_ABSOLUTE_DESTINATION_FILES + "/usr/share/ufo/zeropad.cl") + if(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION) + message(WARNING "ABSOLUTE path INSTALL DESTINATION : ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() + if(CMAKE_ERROR_ON_ABSOLUTE_INSTALL_DESTINATION) + message(FATAL_ERROR "ABSOLUTE path INSTALL DESTINATION forbidden (by caller): ${CMAKE_ABSOLUTE_DESTINATION_FILES}") + endif() +file(INSTALL DESTINATION "/usr/share/ufo" TYPE FILE FILES "/ccpi/repos/ufo-filters/src/kernels/zeropad.cl") +endif() + diff --git a/src/kernels/forwardproject.cl b/src/kernels/forwardproject.cl index 18a8019..89faabd 100644 --- a/src/kernels/forwardproject.cl +++ b/src/kernels/forwardproject.cl @@ -19,7 +19,7 @@ constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | - CLK_FILTER_LINEAR; + CLK_FILTER_NEAREST; kernel void forwardproject(read_only image2d_t slice, @@ -37,10 +37,10 @@ forwardproject(read_only image2d_t slice, /* positive/negative distance from detector center */ const float d = idx - axis_pos + 0.5f; /* length of the cut through the circle */ - const float l = sqrt(4.0f*r*r - 4.0f*d*d); + const float l = sqrt(8.0f*r*r - 4.0f*d*d); /* vector in detector direction */ - float2 D = (float2) (cos(angle), sin(angle)); + float2 D = (float2) (cos(angle), -sin(angle)); D = normalize(D); /* vector perpendicular to the detector */ diff --git a/src/kernels/lamino_kernel.cl b/src/kernels/lamino_kernel.cl new file mode 100644 index 0000000..e573910 --- /dev/null +++ b/src/kernels/lamino_kernel.cl @@ -0,0 +1,478 @@ +#define rotate() pixel.x -= x_center.x; \ + pixel.y -= y_center; \ + pixel.x = pixel.x * cos_roll + pixel.y * sin_roll; \ + pixel.y = -pixel.x * sin_roll + pixel.y * cos_roll; \ + pixel.x += x_center.x; \ + pixel.y += y_center; + +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_1 ( +read_only image2d_t projection_0, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float sines, + const float cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_lamino = sincos (mad((float) idz, lamino_region.y, lamino_region.x), &cos_lamino); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines, mad(voxel.y, sines, x_center.x)); + pixel.y = mad(tmp_x, sines, mad(tmp_y, cosines, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_2 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float2 sines, + const float2 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_lamino = sincos (mad((float) idz, lamino_region.y, lamino_region.x), &cos_lamino); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_4 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float4 sines, + const float4 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_lamino = sincos (mad((float) idz, lamino_region.y, lamino_region.x), &cos_lamino); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_8 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float8 sines, + const float8 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_lamino = sincos (mad((float) idz, lamino_region.y, lamino_region.x), &cos_lamino); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_16 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, +read_only image2d_t projection_8, +read_only image2d_t projection_9, +read_only image2d_t projection_10, +read_only image2d_t projection_11, +read_only image2d_t projection_12, +read_only image2d_t projection_13, +read_only image2d_t projection_14, +read_only image2d_t projection_15, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float16 sines, + const float16 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_lamino = sincos (mad((float) idz, lamino_region.y, lamino_region.x), &cos_lamino); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s8, mad(voxel.y, sines.s8, x_center.x)); + pixel.y = mad(tmp_x, sines.s8, mad(tmp_y, cosines.s8, tmp)); + rotate (); + result += read_imagef (projection_8, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s9, mad(voxel.y, sines.s9, x_center.x)); + pixel.y = mad(tmp_x, sines.s9, mad(tmp_y, cosines.s9, tmp)); + rotate (); + result += read_imagef (projection_9, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sa, mad(voxel.y, sines.sa, x_center.x)); + pixel.y = mad(tmp_x, sines.sa, mad(tmp_y, cosines.sa, tmp)); + rotate (); + result += read_imagef (projection_10, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sb, mad(voxel.y, sines.sb, x_center.x)); + pixel.y = mad(tmp_x, sines.sb, mad(tmp_y, cosines.sb, tmp)); + rotate (); + result += read_imagef (projection_11, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sc, mad(voxel.y, sines.sc, x_center.x)); + pixel.y = mad(tmp_x, sines.sc, mad(tmp_y, cosines.sc, tmp)); + rotate (); + result += read_imagef (projection_12, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sd, mad(voxel.y, sines.sd, x_center.x)); + pixel.y = mad(tmp_x, sines.sd, mad(tmp_y, cosines.sd, tmp)); + rotate (); + result += read_imagef (projection_13, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.se, mad(voxel.y, sines.se, x_center.x)); + pixel.y = mad(tmp_x, sines.se, mad(tmp_y, cosines.se, tmp)); + rotate (); + result += read_imagef (projection_14, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sf, mad(voxel.y, sines.sf, x_center.x)); + pixel.y = mad(tmp_x, sines.sf, mad(tmp_y, cosines.sf, tmp)); + rotate (); + result += read_imagef (projection_15, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} + diff --git a/src/kernels/roll_kernel.cl b/src/kernels/roll_kernel.cl new file mode 100644 index 0000000..129e14f --- /dev/null +++ b/src/kernels/roll_kernel.cl @@ -0,0 +1,478 @@ +#define rotate() pixel.x -= x_center.x; \ + pixel.y -= y_center; \ + pixel.x = pixel.x * cos_roll + pixel.y * sin_roll; \ + pixel.y = -pixel.x * sin_roll + pixel.y * cos_roll; \ + pixel.x += x_center.x; \ + pixel.y += y_center; + +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_1 ( +read_only image2d_t projection_0, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float sines, + const float cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines, mad(voxel.y, sines, x_center.x)); + pixel.y = mad(tmp_x, sines, mad(tmp_y, cosines, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_2 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float2 sines, + const float2 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_4 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float4 sines, + const float4 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_8 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float8 sines, + const float8 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_16 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, +read_only image2d_t projection_8, +read_only image2d_t projection_9, +read_only image2d_t projection_10, +read_only image2d_t projection_11, +read_only image2d_t projection_12, +read_only image2d_t projection_13, +read_only image2d_t projection_14, +read_only image2d_t projection_15, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float16 sines, + const float16 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + sin_roll = sincos (mad((float) idz, -roll_region.y, -roll_region.x), &cos_roll); + tmp = mad(z_region.x, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s8, mad(voxel.y, sines.s8, x_center.x)); + pixel.y = mad(tmp_x, sines.s8, mad(tmp_y, cosines.s8, tmp)); + rotate (); + result += read_imagef (projection_8, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s9, mad(voxel.y, sines.s9, x_center.x)); + pixel.y = mad(tmp_x, sines.s9, mad(tmp_y, cosines.s9, tmp)); + rotate (); + result += read_imagef (projection_9, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sa, mad(voxel.y, sines.sa, x_center.x)); + pixel.y = mad(tmp_x, sines.sa, mad(tmp_y, cosines.sa, tmp)); + rotate (); + result += read_imagef (projection_10, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sb, mad(voxel.y, sines.sb, x_center.x)); + pixel.y = mad(tmp_x, sines.sb, mad(tmp_y, cosines.sb, tmp)); + rotate (); + result += read_imagef (projection_11, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sc, mad(voxel.y, sines.sc, x_center.x)); + pixel.y = mad(tmp_x, sines.sc, mad(tmp_y, cosines.sc, tmp)); + rotate (); + result += read_imagef (projection_12, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sd, mad(voxel.y, sines.sd, x_center.x)); + pixel.y = mad(tmp_x, sines.sd, mad(tmp_y, cosines.sd, tmp)); + rotate (); + result += read_imagef (projection_13, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.se, mad(voxel.y, sines.se, x_center.x)); + pixel.y = mad(tmp_x, sines.se, mad(tmp_y, cosines.se, tmp)); + rotate (); + result += read_imagef (projection_14, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sf, mad(voxel.y, sines.sf, x_center.x)); + pixel.y = mad(tmp_x, sines.sf, mad(tmp_y, cosines.sf, tmp)); + rotate (); + result += read_imagef (projection_15, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} + diff --git a/src/kernels/stacked-backproject.cl b/src/kernels/stacked-backproject.cl index d6fff5f..c9938cb 100644 --- a/src/kernels/stacked-backproject.cl +++ b/src/kernels/stacked-backproject.cl @@ -23,7 +23,7 @@ constant sampler_t volumeSampler_single = CLK_NORMALIZED_COORDS_FALSE | constant sampler_t volumeSampler_half = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | - CLK_FILTER_NEAREST; + CLK_FILTER_LINEAR; constant sampler_t volumeSampler_int8 = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | @@ -47,6 +47,34 @@ interleave_single ( global float *sinogram, write_imagef(interleaved_sinograms, (int4)(idx, idy, idz, 0),(float4)(x,y,0.0f,0.0f)); } +/*kernel void +texture_single (read_only image2d_array_t sinogram, + global float2 *reconstructed_buffer, + constant float *sin_lut, + constant float *cos_lut, + const unsigned int x_offset, + const unsigned int y_offset, + const unsigned int angle_offset, + const unsigned int n_projections, + const float axis_pos, + unsigned long size) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + const float bx = idx - axis_pos + x_offset + 0.5f; + const float by = idy - axis_pos + y_offset + 0.5f; + float2 sum = {0.0f, 0.0f}; + + for(int proj = 0; proj < n_projections; proj++) { + float h = -by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; + sum += read_imagef (sinogram, volumeSampler_single, (float4)(h, proj + 0.5f,idz,0.0f)).xy; + } + + reconstructed_buffer[idx + idy*size + idz*size*size] = sum; +}*/ + + kernel void texture_single ( read_only image2d_array_t sinogram, @@ -73,12 +101,12 @@ texture_single ( int global_sizex = get_global_size(0); int global_sizey = get_global_size(1); - /* Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant */ + // Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant int square = local_idy%4; int quadrant = local_idx/4; int pixel = local_idx%4; - /* Computing projection and pixel offsets */ + // Computing projection and pixel offsets int projection_index = local_idy/4; int2 remapped_index_local = {(4*square + 2*(quadrant%2) + (pixel%2)), @@ -94,6 +122,27 @@ texture_single ( __local float2 shared_mem[64][4]; __local float2 reconstructed_cache[16][16]; +/*#ifdef DEVICE_TESLA_K20XM +#pragma unroll 4 +#endif +#ifdef DEVICE_TESLA_P100_PCIE_16GB +#pragma unroll 2 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN_BLACK +#pragma unroll 8 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN +#pragma unroll 14 +#endif +#ifdef DEVICE_GEFORCE_GTX_1080_TI +#pragma unroll 10 +#endif +#ifdef DEVICE_QUADRO_M6000 +#pragma unroll 2 +#endif +#ifdef DEVICE_GFX1010 +#pragma unroll 4 +#endif*/ for(int proj = projection_index; proj < n_projections; proj+=4) { float sine_value = sin_lut[angle_offset + proj]; @@ -106,7 +155,7 @@ texture_single ( int2 remapped_index = {(local_idx%4), (4*local_idy + (local_idx/4))}; for(int q=0; q<4;q+=1){ - /* Moving partial sums to shared memory */ + // Moving partial sums to shared memory shared_mem[(local_sizex*remapped_index_local.y + remapped_index_local.x)][projection_index] = sum[q]; barrier(CLK_LOCAL_MEM_FENCE); // syncthreads @@ -124,7 +173,7 @@ texture_single ( barrier(CLK_LOCAL_MEM_FENCE); // syncthreads } - reconstructed_buffer[global_idx + global_idy*size + idz*size*size] = reconstructed_cache[local_idy][local_idx] * M_PI_F / n_projections; + reconstructed_buffer[global_idx + global_idy*size + idz*size*size] = reconstructed_cache[local_idy][local_idx]; } kernel void @@ -163,6 +212,54 @@ interleave_half (global float *sinogram, write_imagef(interleaved_sinograms, (int4)(idx, idy, idz, 0),(float4)(b)); } +/*kernel void +texture_half (read_only image2d_array_t sinogram, + global float4 *reconstructed_buffer, + constant float *sin_lut, + constant float *cos_lut, + const unsigned int x_offset, + const unsigned int y_offset, + const unsigned int angle_offset, + const unsigned int n_projections, + const float axis_pos, + unsigned long size) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + const float bx = idx - axis_pos + x_offset + 0.5f; + const float by = idy - axis_pos + y_offset + 0.5f; + float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; + +#ifdef DEVICE_TESLA_K20XM +#pragma unroll 4 +#endif +#ifdef DEVICE_TESLA_P100_PCIE_16GB +#pragma unroll 2 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN_BLACK +#pragma unroll 8 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN +#pragma unroll 14 +#endif +#ifdef DEVICE_GEFORCE_GTX_1080_TI +#pragma unroll 10 +#endif +#ifdef DEVICE_QUADRO_M6000 +#pragma unroll 2 +#endif +#ifdef DEVICE_GFX1010 +#pragma unroll 4 +#endif + for(int proj = 0; proj < n_projections; proj++) { + float h = -by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; + sum += read_imagef (sinogram, volumeSampler_half, (float4)(h, proj + 0.5f,idz,0.0f)); + } + + reconstructed_buffer[idx + idy*size + idz*size*size] = sum; +}*/ + kernel void texture_half ( read_only image2d_array_t sinogram, @@ -189,12 +286,12 @@ texture_half ( int global_sizex = get_global_size(0); int global_sizey = get_global_size(1); - /* Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant */ + // Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant int square = local_idy%4; int quadrant = local_idx/4; int pixel = local_idx%4; - /* Computing projection and pixel offsets */ + // Computing projection and pixel offsets int projection_index = local_idy/4; int2 remapped_index_local = {(4*square + 2*(quadrant%2) + (pixel%2)),(2* (quadrant/2) + (pixel/2))}; int2 remapped_index_global = {(get_group_id(0)*get_local_size(0)+remapped_index_local.x), @@ -206,6 +303,27 @@ texture_half ( __local float4 shared_mem[64][4]; __local float4 reconstructed_cache[16][16]; +#ifdef DEVICE_TESLA_K20XM +#pragma unroll 4 +#endif +#ifdef DEVICE_TESLA_P100_PCIE_16GB +#pragma unroll 2 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN_BLACK +#pragma unroll 8 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN +#pragma unroll 14 +#endif +#ifdef DEVICE_GEFORCE_GTX_1080_TI +#pragma unroll 10 +#endif +#ifdef DEVICE_QUADRO_M6000 +#pragma unroll 2 +#endif +#ifdef DEVICE_GFX1010 +#pragma unroll 4 +#endif for(int proj = projection_index; proj < n_projections; proj+=4) { float sine_value = sin_lut[angle_offset + proj]; @@ -218,7 +336,7 @@ texture_half ( int2 remapped_index = {(local_idx%4), (4*local_idy + (local_idx/4))}; for(int q=0; q<4;q+=1){ - /* Moving partial sums to shared memory */ + // Moving partial sums to shared memory shared_mem[(local_sizex*remapped_index_local.y + remapped_index_local.x)][projection_index] = sum[q]; barrier(CLK_LOCAL_MEM_FENCE); // syncthreads @@ -235,7 +353,7 @@ texture_half ( } barrier(CLK_LOCAL_MEM_FENCE); // syncthreads } - reconstructed_buffer[global_idx + global_idy*size + idz*size*size] = reconstructed_cache[local_idy][local_idx] * M_PI_F / n_projections; + reconstructed_buffer[global_idx + global_idy*size + idz*size*size] = reconstructed_cache[local_idy][local_idx]; } kernel void @@ -258,6 +376,11 @@ uninterleave_half (global float4 *reconstructed_buffer, output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = b.w; } +union converter { + uint2 storage; + uchar8 a; +}; + kernel void interleave_uint (global float *sinogram, write_only image2d_array_t interleaved_sinograms, @@ -270,22 +393,85 @@ interleave_uint (global float *sinogram, const int sizex = get_global_size(0); const int sizey = get_global_size(1); - int sinogram_offset = idz*4; + int sinogram_offset = idz*8; const float scale = 255.0f / (max - min); - uint4 b = {(sinogram[idx + idy * sizex + (sinogram_offset) * sizex * sizey] - min)*scale, - (sinogram[idx + idy * sizex + (sinogram_offset+1) * sizex * sizey] - min)*scale, - (sinogram[idx + idy * sizex + (sinogram_offset+2) * sizex * sizey] - min)*scale, - (sinogram[idx + idy * sizex + (sinogram_offset+3) * sizex * sizey] - min)*scale}; - - write_imageui(interleaved_sinograms, (int4)(idx, idy, idz, 0),(uint4)(b)); + union converter il; + il.a.s0 = (sinogram[idx + idy * sizex + (sinogram_offset) * sizex * sizey] - min)*scale; + il.a.s1 = (sinogram[idx + idy * sizex + (sinogram_offset+1) * sizex * sizey] - min)*scale; + il.a.s2 = (sinogram[idx + idy * sizex + (sinogram_offset+2) * sizex * sizey] - min)*scale; + il.a.s3 = (sinogram[idx + idy * sizex + (sinogram_offset+3) * sizex * sizey] - min)*scale; + il.a.s4 = (sinogram[idx + idy * sizex + (sinogram_offset+4) * sizex * sizey] - min)*scale; + il.a.s5 = (sinogram[idx + idy * sizex + (sinogram_offset+5) * sizex * sizey] - min)*scale; + il.a.s6 = (sinogram[idx + idy * sizex + (sinogram_offset+6) * sizex * sizey] - min)*scale; + il.a.s7 = (sinogram[idx + idy * sizex + (sinogram_offset+7) * sizex * sizey] - min)*scale; + + write_imageui(interleaved_sinograms, (int4)(idx, idy, idz, 0),(uint4)((uint)il.storage.x,(uint)il.storage.y,0,0)); } +/*kernel void +texture_uint (read_only image2d_array_t sinogram, + global uint8 *reconstructed_buffer, + constant float *sin_lut, + constant float *cos_lut, + const unsigned int x_offset, + const unsigned int y_offset, + const unsigned int angle_offset, + const unsigned int n_projections, + const float axis_pos, + unsigned long size) +{ + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + const float bx = idx - axis_pos + x_offset + 0.5f; + const float by = idy - axis_pos + y_offset + 0.5f; + uint8 sum = {0,0,0,0,0,0,0,0}; + +#ifdef DEVICE_TESLA_K20XM +#pragma unroll 4 +#endif +#ifdef DEVICE_TESLA_P100_PCIE_16GB +#pragma unroll 2 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN_BLACK +#pragma unroll 8 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN +#pragma unroll 14 +#endif +#ifdef DEVICE_GEFORCE_GTX_1080_TI +#pragma unroll 10 +#endif +#ifdef DEVICE_QUADRO_M6000 +#pragma unroll 2 +#endif +#ifdef DEVICE_GFX1010 +#pragma unroll 4 +#endif + union converter tex; + for(int proj = 0; proj < n_projections; proj++) { + float h = -by * sin_lut[angle_offset + proj] + bx * cos_lut[angle_offset + proj] + axis_pos; + tex.storage = read_imageui (sinogram, volumeSampler_int8, (float4)(h, proj + 0.5f,idz,0.0f)).xy; + + sum.s0 += (uint)tex.a.s0; + sum.s1 += (uint)tex.a.s1; + sum.s2 += (uint)tex.a.s2; + sum.s3 += (uint)tex.a.s3; + sum.s4 += (uint)tex.a.s4; + sum.s5 += (uint)tex.a.s5; + sum.s6 += (uint)tex.a.s6; + sum.s7 += (uint)tex.a.s7; + } + + reconstructed_buffer[idx + idy*size + idz*size*size] = sum; +}*/ + kernel void texture_uint ( read_only image2d_array_t sinogram, - global uint4 *reconstructed_buffer, + global uint8 *reconstructed_buffer, constant float *sin_lut, constant float *cos_lut, const unsigned int x_offset, @@ -308,12 +494,12 @@ texture_uint ( int global_sizex = get_global_size(0); int global_sizey = get_global_size(1); - /* Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant */ + // Computing sequential numbers of 4x4 square, quadrant, and pixel within quadrant int square = local_idy%4; int quadrant = local_idx/4; int pixel = local_idx%4; - /* Computing projection and pixel offsets */ + // Computing projection and pixel offsets int projection_index = local_idy/4; int2 remapped_index_local = {(4*square + 2*(quadrant%2) + (pixel%2)),(2* (quadrant/2) + (pixel/2))}; int2 remapped_index_global = {(get_group_id(0)*get_local_size(0)+remapped_index_local.x), @@ -321,23 +507,56 @@ texture_uint ( float2 pixel_coord = {(remapped_index_global.x-axis_pos+x_offset+0.5f), (remapped_index_global.y-axis_pos+y_offset+0.5f)}; //bx and by - uint4 sum[4] = {0,0,0,0}; - __local uint4 shared_mem[64][4]; - __local uint4 reconstructed_cache[16][16]; + uint8 sum[4] = {0,0,0,0}; + __local uint8 shared_mem[64][4]; + __local uint8 reconstructed_cache[16][16]; + + union converter tex; + +#ifdef DEVICE_TESLA_K20XM +#pragma unroll 4 +#endif +#ifdef DEVICE_TESLA_P100_PCIE_16GB +#pragma unroll 2 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN_BLACK +#pragma unroll 8 +#endif +#ifdef DEVICE_GEFORCE_GTX_TITAN +#pragma unroll 14 +#endif +#ifdef DEVICE_GEFORCE_GTX_1080_TI +#pragma unroll 10 +#endif +#ifdef DEVICE_QUADRO_M6000 +#pragma unroll 2 +#endif +#ifdef DEVICE_GFX1010 +#pragma unroll 4 +#endif for(int proj = projection_index; proj < n_projections; proj+=4) { float sine_value = sin_lut[angle_offset + proj]; float h = pixel_coord.x * cos_lut[angle_offset + proj] - pixel_coord.y * sin_lut[angle_offset + proj] + axis_pos; for(int q=0; q<4; q+=1){ - sum[q] += read_imageui(sinogram, volumeSampler_int8, (float4)(h-4*q*sine_value, proj + 0.5f,idz, 0.0)); + tex.storage = read_imageui(sinogram, volumeSampler_int8, (float4)(h-4*q*sine_value, proj + 0.5f,idz, 0.0)).xy; + + sum[q].s0 += (uint)tex.a.s0; + sum[q].s1 += (uint)tex.a.s1; + sum[q].s2 += (uint)tex.a.s2; + sum[q].s3 += (uint)tex.a.s3; + sum[q].s4 += (uint)tex.a.s4; + sum[q].s5 += (uint)tex.a.s5; + sum[q].s6 += (uint)tex.a.s6; + sum[q].s7 += (uint)tex.a.s7; + } } - int2 remapped_index = {(local_idx%4), (4*local_idy + (local_idx/4))}; for(int q=0; q<4;q+=1){ - /* Moving partial sums to shared memory */ + // Moving partial sums to shared memory shared_mem[(local_sizex*remapped_index_local.y + remapped_index_local.x)][projection_index] = sum[q]; barrier(CLK_LOCAL_MEM_FENCE); // syncthreads @@ -359,7 +578,7 @@ texture_uint ( } kernel void -uninterleave_uint (global uint4 *reconstructed_buffer, +uninterleave_uint (global uint8 *reconstructed_buffer, global float *output, const float min, const float max, @@ -371,11 +590,15 @@ uninterleave_uint (global uint4 *reconstructed_buffer, const int sizex = get_global_size(0); const int sizey = get_global_size(1); - int output_offset = idz*4; + int output_offset = idz*8; float scale = (max-min)/255.0f; - output[idx + idy*sizex + (output_offset)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].x)*scale+min)* M_PI_F / n_projections; - output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].y)*scale+min)* M_PI_F / n_projections; - output[idx + idy*sizex + (output_offset+2)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].z)*scale+min)* M_PI_F / n_projections; - output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].w)*scale+min)* M_PI_F / n_projections; -}
\ No newline at end of file + output[idx + idy*sizex + (output_offset)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s0)*scale+min) ; + output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s1)*scale+min) ; + output[idx + idy*sizex + (output_offset+2)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s2)*scale+min) ; + output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s3)*scale+min) ; + output[idx + idy*sizex + (output_offset+4)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s4)*scale+min) ; + output[idx + idy*sizex + (output_offset+5)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s5)*scale+min) ; + output[idx + idy*sizex + (output_offset+6)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s6)*scale+min) ; + output[idx + idy*sizex + (output_offset+7)*sizex*sizey] = ((reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s7)*scale+min) ; +} diff --git a/src/kernels/stacked-forwardproject.cl b/src/kernels/stacked-forwardproject.cl new file mode 100644 index 0000000..c37eb53 --- /dev/null +++ b/src/kernels/stacked-forwardproject.cl @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2011-2013 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + + constant sampler_t volumeSampler = CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + + kernel void + interleave_single ( global float *slices, + write_only image2d_array_t interleaved_slices) + { + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + const int sizex = get_global_size(0); + const int sizey = get_global_size(1); + + int slice_offset = idz*2; + + float x = slices[idx + idy * sizex + (slice_offset) * sizex * sizey]; + float y = slices[idx + idy * sizex + (slice_offset+1) * sizex * sizey]; + + write_imagef(interleaved_slices, (int4)(idx, idy, idz, 0),(float4)(x,y,0.0f,0.0f)); + } + + kernel void + texture_single ( + read_only image2d_array_t slices, + global float2 *reconstructed_buffer, + float axis_pos, + float angle_step, + unsigned long size){ + + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + float angle = idy * angle_step; + float r = fmin (axis_pos, size - axis_pos); + float d = idx - axis_pos + 0.5f; + float l = sqrt(8.0f*r*r - 4.0f*d*d); + + float2 D = (float2) (cos(angle), -sin(angle)); + + D = normalize(D); + + float2 N = (float2) (D.y, -D.x); + + float2 sample = d * D - l/2.0f * N + ((float2) (axis_pos, axis_pos)); + + float2 sum = {0.0f,0.0f}; + + for (int i = 0; i < l; i++) { + sum += read_imagef(slices, volumeSampler, (float4)((float2)sample,idz,0.0f)).xy; + sample += N; + } + + reconstructed_buffer[idx + idy*size + idz*size*size] = sum; + } + + + kernel void + uninterleave_single (global float2 *reconstructed_buffer, + global float *output) + { + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + const int sizex = get_global_size(0); + const int sizey = get_global_size(1); + int output_offset = idz*2; + + output[idx + idy*sizex + (output_offset)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].x; + output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].y; + } + + kernel void + interleave_half ( global float *slices, + write_only image2d_array_t interleaved_slices) + { + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + const int sizex = get_global_size(0); + const int sizey = get_global_size(1); + + int slice_offset = idz*4; + + float4 b = {slices[idx + idy * sizex + (slice_offset) * sizex * sizey], + slices[idx + idy * sizex + (slice_offset+1) * sizex * sizey], + slices[idx + idy * sizex + (slice_offset+2) * sizex * sizey], + slices[idx + idy * sizex + (slice_offset+3) * sizex * sizey]}; + + write_imagef(interleaved_slices, (int4)(idx, idy, idz, 0),(float4)(b)); + } + +kernel void +texture_half ( + read_only image2d_array_t slices, + global float4 *reconstructed_buffer, + float axis_pos, + float angle_step, + unsigned long size){ + + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + float angle = idy * angle_step; + float r = fmin (axis_pos, size - axis_pos); + float d = idx - axis_pos + 0.5f; + float l = sqrt(8.0f*r*r - 4.0f*d*d); + + float2 D = (float2) (cos(angle), -sin(angle)); + + D = normalize(D); + + float2 N = (float2) (D.y, -D.x); + + float2 sample = d * D - l/2.0f * N + ((float2) (axis_pos, axis_pos)); + + float4 sum = {0.0f,0.0f,0.0f,0.0f}; + + for (int i = 0; i < l; i++) { + sum += read_imagef(slices, volumeSampler, (float4)((float2)sample,idz,0.0f)); + sample += N; + } + + reconstructed_buffer[idx + idy*size + idz*size*size] = sum; +} + + kernel void + uninterleave_half (global float4 *reconstructed_buffer, + global float *output) + { + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + const int sizex = get_global_size(0); + const int sizey = get_global_size(1); + + int output_offset = idz*4; + + output[idx + idy*sizex + (output_offset)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].x; + output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].y; + output[idx + idy*sizex + (output_offset+2)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].z; + output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].w; + } + + union converter { + uint2 storage; + uchar8 a; + }; + + kernel void + interleave_uint ( global float *slices, + write_only image2d_array_t interleaved_slices, + const float min, + const float max) + { + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + const int sizex = get_global_size(0); + const int sizey = get_global_size(1); + + int slice_offset = idz*8; + + const float scale = 255.0f / (max - min); + + union converter il; + il.a.s0 = (slices[idx + idy * sizex + (slice_offset) * sizex * sizey] - min)*scale; + il.a.s1 = (slices[idx + idy * sizex + (slice_offset+1) * sizex * sizey] - min)*scale; + il.a.s2 = (slices[idx + idy * sizex + (slice_offset+2) * sizex * sizey] - min)*scale; + il.a.s3 = (slices[idx + idy * sizex + (slice_offset+3) * sizex * sizey] - min)*scale; + il.a.s4 = (slices[idx + idy * sizex + (slice_offset+4) * sizex * sizey] - min)*scale; + il.a.s5 = (slices[idx + idy * sizex + (slice_offset+5) * sizex * sizey] - min)*scale; + il.a.s6 = (slices[idx + idy * sizex + (slice_offset+6) * sizex * sizey] - min)*scale; + il.a.s7 = (slices[idx + idy * sizex + (slice_offset+7) * sizex * sizey] - min)*scale; + + write_imageui(interleaved_slices, (int4)(idx, idy, idz, 0),(uint4)((uint)il.storage.x,(uint)il.storage.y,0,0)); + } + +kernel void +texture_uint ( + read_only image2d_array_t slices, + global uint8 *reconstructed_buffer, + float axis_pos, + float angle_step, + unsigned long size){ + + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + float angle = idy * angle_step; + float r = fmin (axis_pos, size - axis_pos); + float d = idx - axis_pos + 0.5f; + float l = sqrt(8.0f*r*r - 4.0f*d*d); + + float2 D = (float2) (cos(angle), -sin(angle)); + + D = normalize(D); + + float2 N = (float2) (D.y, -D.x); + + float2 sample = d * D - l/2.0f * N + ((float2) (axis_pos, axis_pos)); + + uint8 sum = {0,0,0,0,0,0,0,0}; + + union converter tex; + for (int i = 0; i < l; i++) { + tex.storage = read_imageui(slices, volumeSampler, (float4)((float2)sample,idz,0.0f)).xy; + + sum.s0 += (uint)tex.a.s0; + sum.s1 += (uint)tex.a.s1; + sum.s2 += (uint)tex.a.s2; + sum.s3 += (uint)tex.a.s3; + sum.s4 += (uint)tex.a.s4; + sum.s5 += (uint)tex.a.s5; + sum.s6 += (uint)tex.a.s6; + sum.s7 += (uint)tex.a.s7; + + sample += N; + } + + reconstructed_buffer[idx + idy*size + idz*size*size] = sum; +} + + kernel void + uninterleave_uint (global uint8 *reconstructed_buffer, + global float *output, + const float min, + const float max) + { + const int idx = get_global_id(0); + const int idy = get_global_id(1); + const int idz = get_global_id(2); + + const int sizex = get_global_size(0); + const int sizey = get_global_size(1); + + int output_offset = idz*8; + float scale = (max-min)/255.0f; + + output[idx + idy*sizex + (output_offset)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s0)*scale+min; + output[idx + idy*sizex + (output_offset+1)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s1)*scale+min; + output[idx + idy*sizex + (output_offset+2)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s2)*scale+min; + output[idx + idy*sizex + (output_offset+3)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s3)*scale+min; + output[idx + idy*sizex + (output_offset+4)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s4)*scale+min; + output[idx + idy*sizex + (output_offset+5)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s5)*scale+min; + output[idx + idy*sizex + (output_offset+6)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s6)*scale+min; + output[idx + idy*sizex + (output_offset+7)*sizex*sizey] = (reconstructed_buffer[idx + idy*sizex + idz*sizex*sizey].s7)*scale+min; + } + diff --git a/src/kernels/z_kernel.cl b/src/kernels/z_kernel.cl new file mode 100644 index 0000000..12da57a --- /dev/null +++ b/src/kernels/z_kernel.cl @@ -0,0 +1,478 @@ +#define rotate() pixel.x -= x_center.x; \ + pixel.y -= y_center; \ + pixel.x = pixel.x * cos_roll + pixel.y * sin_roll; \ + pixel.y = -pixel.x * sin_roll + pixel.y * cos_roll; \ + pixel.x += x_center.x; \ + pixel.y += y_center; + +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_1 ( +read_only image2d_t projection_0, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float sines, + const float cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + voxel.z = mad((float) idz, z_region.y, z_region.x); + tmp = mad(voxel.z, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines, mad(voxel.y, sines, x_center.x)); + pixel.y = mad(tmp_x, sines, mad(tmp_y, cosines, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_2 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float2 sines, + const float2 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + voxel.z = mad((float) idz, z_region.y, z_region.x); + tmp = mad(voxel.z, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_4 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float4 sines, + const float4 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + voxel.z = mad((float) idz, z_region.y, z_region.x); + tmp = mad(voxel.z, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_8 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float8 sines, + const float8 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + voxel.z = mad((float) idz, z_region.y, z_region.x); + tmp = mad(voxel.z, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} +/* + * Copyright (C) 2015-2016 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +kernel void backproject_burst_16 ( +read_only image2d_t projection_0, +read_only image2d_t projection_1, +read_only image2d_t projection_2, +read_only image2d_t projection_3, +read_only image2d_t projection_4, +read_only image2d_t projection_5, +read_only image2d_t projection_6, +read_only image2d_t projection_7, +read_only image2d_t projection_8, +read_only image2d_t projection_9, +read_only image2d_t projection_10, +read_only image2d_t projection_11, +read_only image2d_t projection_12, +read_only image2d_t projection_13, +read_only image2d_t projection_14, +read_only image2d_t projection_15, + global float *volume, + const sampler_t sampler, + const int3 real_size, + const float2 x_center, + const float y_center, + const float2 x_region, + const float2 y_region, + const float2 z_region, + const float2 lamino_region, + const float2 roll_region, + float sin_lamino, + float cos_lamino, + const float16 sines, + const float16 cosines, + const float norm_factor, + float sin_roll, + float cos_roll, + const int cumulate) +{ + int idx = get_global_id (0); + int idy = get_global_id (1); + int idz = get_global_id (2); + float result, tmp, tmp_x, tmp_y; + float2 pixel; + float3 voxel; + + if (idx < real_size.x && idy < real_size.y && idz < real_size.z) { + voxel.x = mad((float) idx, x_region.y, x_region.x); + voxel.y = mad((float) idy, y_region.y, y_region.x); + voxel.z = mad((float) idz, z_region.y, z_region.x); + tmp = mad(voxel.z, sin_lamino, y_center); + tmp_x = voxel.x * cos_lamino; + tmp_y = -voxel.y * cos_lamino; + + pixel.x = mad(voxel.x, cosines.s0, mad(voxel.y, sines.s0, x_center.x)); + pixel.y = mad(tmp_x, sines.s0, mad(tmp_y, cosines.s0, tmp)); + rotate (); + result = read_imagef (projection_0, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s1, mad(voxel.y, sines.s1, x_center.x)); + pixel.y = mad(tmp_x, sines.s1, mad(tmp_y, cosines.s1, tmp)); + rotate (); + result += read_imagef (projection_1, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s2, mad(voxel.y, sines.s2, x_center.x)); + pixel.y = mad(tmp_x, sines.s2, mad(tmp_y, cosines.s2, tmp)); + rotate (); + result += read_imagef (projection_2, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s3, mad(voxel.y, sines.s3, x_center.x)); + pixel.y = mad(tmp_x, sines.s3, mad(tmp_y, cosines.s3, tmp)); + rotate (); + result += read_imagef (projection_3, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s4, mad(voxel.y, sines.s4, x_center.x)); + pixel.y = mad(tmp_x, sines.s4, mad(tmp_y, cosines.s4, tmp)); + rotate (); + result += read_imagef (projection_4, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s5, mad(voxel.y, sines.s5, x_center.x)); + pixel.y = mad(tmp_x, sines.s5, mad(tmp_y, cosines.s5, tmp)); + rotate (); + result += read_imagef (projection_5, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s6, mad(voxel.y, sines.s6, x_center.x)); + pixel.y = mad(tmp_x, sines.s6, mad(tmp_y, cosines.s6, tmp)); + rotate (); + result += read_imagef (projection_6, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s7, mad(voxel.y, sines.s7, x_center.x)); + pixel.y = mad(tmp_x, sines.s7, mad(tmp_y, cosines.s7, tmp)); + rotate (); + result += read_imagef (projection_7, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s8, mad(voxel.y, sines.s8, x_center.x)); + pixel.y = mad(tmp_x, sines.s8, mad(tmp_y, cosines.s8, tmp)); + rotate (); + result += read_imagef (projection_8, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.s9, mad(voxel.y, sines.s9, x_center.x)); + pixel.y = mad(tmp_x, sines.s9, mad(tmp_y, cosines.s9, tmp)); + rotate (); + result += read_imagef (projection_9, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sa, mad(voxel.y, sines.sa, x_center.x)); + pixel.y = mad(tmp_x, sines.sa, mad(tmp_y, cosines.sa, tmp)); + rotate (); + result += read_imagef (projection_10, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sb, mad(voxel.y, sines.sb, x_center.x)); + pixel.y = mad(tmp_x, sines.sb, mad(tmp_y, cosines.sb, tmp)); + rotate (); + result += read_imagef (projection_11, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sc, mad(voxel.y, sines.sc, x_center.x)); + pixel.y = mad(tmp_x, sines.sc, mad(tmp_y, cosines.sc, tmp)); + rotate (); + result += read_imagef (projection_12, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sd, mad(voxel.y, sines.sd, x_center.x)); + pixel.y = mad(tmp_x, sines.sd, mad(tmp_y, cosines.sd, tmp)); + rotate (); + result += read_imagef (projection_13, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.se, mad(voxel.y, sines.se, x_center.x)); + pixel.y = mad(tmp_x, sines.se, mad(tmp_y, cosines.se, tmp)); + rotate (); + result += read_imagef (projection_14, sampler, pixel).x; + pixel.x = mad(voxel.x, cosines.sf, mad(voxel.y, sines.sf, x_center.x)); + pixel.y = mad(tmp_x, sines.sf, mad(tmp_y, cosines.sf, tmp)); + rotate (); + result += read_imagef (projection_15, sampler, pixel).x; + + + if (cumulate) { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] += result * norm_factor; + } else { + volume[idz * real_size.x * real_size.y + idy * real_size.x + idx] = result * norm_factor; + } + } +} + diff --git a/src/ufo-backproject-task.c b/src/ufo-backproject-task.c index ca002ea..580d1bc 100644 --- a/src/ufo-backproject-task.c +++ b/src/ufo-backproject-task.c @@ -25,6 +25,7 @@ #endif #include <math.h> +#include <stdio.h> #include "ufo-backproject-task.h" @@ -60,6 +61,7 @@ struct _UfoBackprojectTaskPrivate { gint roi_width; gint roi_height; Mode mode; + size_t out_mem_size; }; static void ufo_task_interface_init (UfoTaskIface *iface); @@ -144,8 +146,14 @@ ufo_backproject_task_process (UfoTask *task, UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel, 8, sizeof (gfloat), &axis_pos)); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); + ufo_profiler_enable_tracing(profiler, TRUE); ufo_profiler_call (profiler, cmd_queue, kernel, 2, requisition->dims, NULL); - + + size_t temp_size; + clGetMemObjectInfo(out_mem,CL_MEM_SIZE,sizeof(temp_size),&temp_size,NULL); + priv->out_mem_size += temp_size; + //fprintf(stdout, "Time taken GPU: %f Size: %zu \n", ufo_profiler_elapsed(profiler, UFO_PROFILER_TIMER_GPU), priv->out_mem_size); + //fprintf(stdout, "Time taken: %f \n",ufo_profiler_elapsed(profiler,UFO_PROFILER_TIMER_GPU)); return TRUE; } @@ -538,4 +546,6 @@ ufo_backproject_task_init (UfoBackprojectTask *self) priv->luts_changed = TRUE; priv->roi_x = priv->roi_y = 0; priv->roi_width = priv->roi_height = 0; + priv->out_mem_size = 0; } + diff --git a/src/ufo-forwardproject-task.c b/src/ufo-forwardproject-task.c index 2058614..ac89879 100644 --- a/src/ufo-forwardproject-task.c +++ b/src/ufo-forwardproject-task.c @@ -24,6 +24,7 @@ #include <CL/cl.h> #endif +#include <stdio.h> #include "ufo-forwardproject-task.h" @@ -33,6 +34,7 @@ struct _UfoForwardprojectTaskPrivate { gfloat axis_pos; gfloat angle_step; guint num_projections; + size_t out_mem_size; }; static void ufo_task_interface_init (UfoTaskIface *iface); @@ -137,6 +139,7 @@ ufo_forwardproject_task_process (UfoTask *task, in_mem = ufo_buffer_get_device_image (inputs[0], cmd_queue); out_mem = ufo_buffer_get_device_array (output, cmd_queue); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); + ufo_profiler_enable_tracing(profiler, TRUE); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 0, sizeof (cl_mem), &in_mem)); UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (priv->kernel, 1, sizeof (cl_mem), &out_mem)); @@ -145,6 +148,11 @@ ufo_forwardproject_task_process (UfoTask *task, ufo_profiler_call (profiler, cmd_queue, priv->kernel, 2, requisition->dims, NULL); + size_t temp_size; + clGetMemObjectInfo(out_mem,CL_MEM_SIZE,sizeof(temp_size),&temp_size,NULL); + priv->out_mem_size += temp_size; + //fprintf(stdout, "Time taken GPU: %f Size: %zu \n", ufo_profiler_elapsed(profiler, UFO_PROFILER_TIMER_GPU), priv->out_mem_size); + return TRUE; } @@ -268,4 +276,5 @@ ufo_forwardproject_task_init(UfoForwardprojectTask *self) self->priv->axis_pos = -G_MAXFLOAT; self->priv->num_projections = 256; self->priv->angle_step = 0; + self->priv->out_mem_size = 0; } diff --git a/src/ufo-stacked-backproject-task.c b/src/ufo-stacked-backproject-task.c index 5ddfd26..cbf2600 100644 --- a/src/ufo-stacked-backproject-task.c +++ b/src/ufo-stacked-backproject-task.c @@ -69,6 +69,7 @@ struct _UfoStackedBackprojectTaskPrivate { gint roi_width; gint roi_height; Precision precision; + size_t out_mem_size; }; static void ufo_task_interface_init (UfoTaskIface *iface); @@ -307,6 +308,8 @@ ufo_stacked_backproject_task_process (UfoTask *task, cmd_queue = ufo_gpu_node_get_cmd_queue(node); out_mem = ufo_buffer_get_device_array (output, cmd_queue); profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); + + ufo_profiler_enable_tracing(profiler, TRUE); /* Guess axis position if they are not provided by the user. */ if (priv->axis_pos <= 0.0) { @@ -348,13 +351,13 @@ ufo_stacked_backproject_task_process (UfoTask *task, buffer_size = sizeof(cl_float4) * dim_x * dim_y * quotient; format.image_channel_data_type = CL_HALF_FLOAT; }else if(priv->precision == INT8){ - quotient = requisition->dims[2]/4; + quotient = requisition->dims[2]/8; kernel_interleave = priv->interleave_uint; kernel_texture = priv->texture_uint; kernel_uninterleave = priv->uninterleave_uint; - format.image_channel_order = CL_RGBA; - format.image_channel_data_type = CL_UNSIGNED_INT8; - buffer_size = sizeof(cl_uint4) * dim_x * dim_y * quotient; + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_UNSIGNED_INT32; + buffer_size = sizeof(cl_uint8) * dim_x * dim_y * quotient; } cl_image_desc imageDesc; @@ -424,7 +427,13 @@ ufo_stacked_backproject_task_process (UfoTask *task, UFO_RESOURCES_CHECK_CLERR(clReleaseMemObject(interleaved_img)); UFO_RESOURCES_CHECK_CLERR(clReleaseMemObject(reconstructed_buffer)); } + + size_t temp_size; + clGetMemObjectInfo(out_mem,CL_MEM_SIZE,sizeof(temp_size),&temp_size,NULL); + priv->out_mem_size += temp_size; + //fprintf(stdout, "Time taken GPU: %f Size: %zu \n", ufo_profiler_elapsed(profiler, UFO_PROFILER_TIMER_GPU), priv->out_mem_size); +// fprintf(stdout, "Time taken: %f \n",ufo_profiler_elapsed(profiler,UFO_PROFILER_TIMER_GPU)); return TRUE; } @@ -716,4 +725,6 @@ ufo_stacked_backproject_task_init(UfoStackedBackprojectTask *self) priv->roi_x = priv->roi_y = 0; priv->roi_width = priv->roi_height = 0; priv->precision = SINGLE; -}
\ No newline at end of file + priv->out_mem_size = 0; +} + diff --git a/src/ufo-stacked-backproject-task.h b/src/ufo-stacked-backproject-task.h index 30f4d27..2ab383b 100644 --- a/src/ufo-stacked-backproject-task.h +++ b/src/ufo-stacked-backproject-task.h @@ -50,4 +50,4 @@ GType ufo_stacked_backproject_task_get_type (void); G_END_DECLS -#endif
\ No newline at end of file +#endif diff --git a/src/ufo-stacked-forwardproject-task.c b/src/ufo-stacked-forwardproject-task.c new file mode 100644 index 0000000..4518762 --- /dev/null +++ b/src/ufo-stacked-forwardproject-task.c @@ -0,0 +1,502 @@ +/* + * Copyright (C) 2011-2015 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" +#ifdef __APPLE__ +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +#include "ufo-stacked-forwardproject-task.h" + +#include <stdio.h> + +typedef enum { + INT8, + HALF, + SINGLE +} Precision; + +static GEnumValue precision_values[] = { + {INT8,"INT8","int8"}, + {HALF, "HALF", "half"}, + {SINGLE, "SINGLE", "single"} +}; + +struct _UfoStackedForwardprojectTaskPrivate { + cl_context context; + cl_kernel interleave_single; + cl_kernel texture_single; + cl_kernel uninterleave_single; + cl_kernel interleave_half; + cl_kernel texture_half; + cl_kernel uninterleave_half; + cl_kernel texture_uint; + cl_kernel interleave_uint; + cl_kernel uninterleave_uint; + gfloat angle_step; + gfloat axis_pos; + guint num_projections; + Precision precision; + size_t out_mem_size; +}; + +static void ufo_task_interface_init (UfoTaskIface *iface); + +G_DEFINE_TYPE_WITH_CODE (UfoStackedForwardprojectTask, ufo_stacked_forwardproject_task, UFO_TYPE_TASK_NODE, + G_IMPLEMENT_INTERFACE (UFO_TYPE_TASK, + ufo_task_interface_init)) + +#define UFO_STACKED_FORWARDPROJECT_TASK_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), UFO_TYPE_STACKED_FORWARDPROJECT_TASK, UfoStackedForwardprojectTaskPrivate)) + +enum { + PROP_0, + PROP_AXIS_POSITION, + PROP_ANGLE_STEP, + PROP_NUM_PROJECTIONS, + PROP_PRECISION, + N_PROPERTIES +}; + +static GParamSpec *properties[N_PROPERTIES] = { NULL, }; + +UfoNode * +ufo_stacked_forwardproject_task_new (void) +{ + return UFO_NODE (g_object_new (UFO_TYPE_STACKED_FORWARDPROJECT_TASK, NULL)); +} + +static void +ufo_stacked_forwardproject_task_setup (UfoTask *task, + UfoResources *resources, + GError **error) +{ + UfoStackedForwardprojectTaskPrivate *priv; + + priv = UFO_STACKED_FORWARDPROJECT_TASK(task)->priv; + + priv->context = ufo_resources_get_context (resources); + priv->interleave_single = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "interleave_single", NULL, error); + priv->texture_single = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "texture_single", NULL, error); + priv->uninterleave_single = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "uninterleave_single", NULL, error); + + priv->interleave_half = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "interleave_half", NULL, error); + priv->texture_half = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "texture_half", NULL, error); + priv->uninterleave_half = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "uninterleave_half", NULL, error); + + priv->interleave_uint = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "interleave_uint", NULL, error); + priv->texture_uint = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "texture_uint", NULL, error); + priv->uninterleave_uint = ufo_resources_get_kernel (resources, "stacked-forwardproject.cl", "uninterleave_uint", NULL, error); + + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainContext (priv->context), error); + + if (priv->interleave_single != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->interleave_single), error); + + if (priv->texture_single != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->texture_single), error); + + if (priv->uninterleave_single != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->uninterleave_single), error); + + if (priv->interleave_half != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->interleave_half), error); + + if (priv->texture_half != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->texture_half), error); + + if (priv->uninterleave_half != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->uninterleave_half), error); + + if (priv->interleave_uint != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->interleave_uint), error); + + if (priv->texture_uint != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->texture_uint), error); + + if (priv->uninterleave_uint != NULL) + UFO_RESOURCES_CHECK_SET_AND_RETURN (clRetainKernel (priv->uninterleave_uint), error); + + if (priv->angle_step == 0) + priv->angle_step = G_PI / priv->num_projections; +} + +static void +ufo_stacked_forwardproject_task_get_requisition (UfoTask *task, + UfoBuffer **inputs, + UfoRequisition *requisition, + GError **error) +{ + UfoStackedForwardprojectTaskPrivate *priv; + UfoRequisition in_req; + + priv = UFO_STACKED_FORWARDPROJECT_TASK(task)->priv; + + ufo_buffer_get_requisition (inputs[0], &in_req); + + requisition->n_dims = 3; + requisition->dims[0] = in_req.dims[0]; + requisition->dims[1] = priv->num_projections; + requisition->dims[2] = in_req.dims[2]; + if (priv->axis_pos == -G_MAXFLOAT) { + priv->axis_pos = in_req.dims[0] / 2.0f; + } +} + +static guint +ufo_stacked_forwardproject_task_get_num_inputs (UfoTask *task) +{ + return 1; +} + +static guint +ufo_stacked_forwardproject_task_get_num_dimensions (UfoTask *task, + guint input) +{ + g_return_val_if_fail (input == 0, 0); + return 3; +} + +static UfoTaskMode +ufo_stacked_forwardproject_task_get_mode (UfoTask *task) +{ + return UFO_TASK_MODE_PROCESSOR | UFO_TASK_MODE_GPU; +} + +static gboolean +ufo_stacked_forwardproject_task_process (UfoTask *task, + UfoBuffer **inputs, + UfoBuffer *output, + UfoRequisition *requisition) +{ + UfoStackedForwardprojectTaskPrivate *priv; + UfoGpuNode *node; + UfoProfiler *profiler; + cl_command_queue cmd_queue; + cl_mem interleaved_img; + cl_mem out_mem; + cl_mem reconstructed_buffer; + cl_mem device_array; + + cl_kernel kernel_interleave; + cl_kernel kernel_texture; + cl_kernel kernel_uninterleave; + + size_t buffer_size; + + priv = UFO_STACKED_FORWARDPROJECT_TASK (task)->priv; + node = UFO_GPU_NODE (ufo_task_node_get_proc_node (UFO_TASK_NODE (task))); + cmd_queue = ufo_gpu_node_get_cmd_queue (node); + out_mem = ufo_buffer_get_device_array (output, cmd_queue); + profiler = ufo_task_node_get_profiler (UFO_TASK_NODE (task)); + + ufo_profiler_enable_tracing(profiler,TRUE); + + // Image format + cl_image_format format; + device_array = ufo_buffer_get_device_array(inputs[0],cmd_queue); + + UfoRequisition req; + ufo_buffer_get_requisition(inputs[0],&req); + + unsigned long dim_x = (requisition->dims[0]%16 == 0) ? requisition->dims[0] : (((requisition->dims[0]/16)+1)*16); + unsigned long dim_y = (requisition->dims[1]%16 == 0) ? requisition->dims[1] : (((requisition->dims[1]/16)+1)*16); + unsigned long quotient; + + if(priv->precision == SINGLE){ + quotient = requisition->dims[2]/2; + kernel_interleave = priv->interleave_single; + kernel_texture = priv->texture_single; + kernel_uninterleave = priv->uninterleave_single; + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_FLOAT; + buffer_size = sizeof(cl_float2) * dim_x * dim_y * quotient; + }else if(priv->precision == HALF){ + quotient = requisition->dims[2]/4; + kernel_interleave = priv->interleave_half; + kernel_texture = priv->texture_half; + kernel_uninterleave = priv->uninterleave_half; + format.image_channel_order = CL_RGBA; + buffer_size = sizeof(cl_float4) * dim_x * dim_y * quotient; + format.image_channel_data_type = CL_HALF_FLOAT; + }else if(priv->precision == INT8){ + quotient = requisition->dims[2]/8; + kernel_interleave = priv->interleave_uint; + kernel_texture = priv->texture_uint; + kernel_uninterleave = priv->uninterleave_uint; + format.image_channel_order = CL_RG; + format.image_channel_data_type = CL_UNSIGNED_INT32; + buffer_size = sizeof(cl_uint8) * dim_x * dim_y * quotient; + } + + cl_image_desc imageDesc; + imageDesc.image_width = req.dims[0]; + imageDesc.image_height = req.dims[1]; + imageDesc.image_depth = 0; + imageDesc.image_array_size = quotient; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + imageDesc.image_slice_pitch = 0; + imageDesc.image_row_pitch = 0; + imageDesc.num_mip_levels = 0; + imageDesc.num_samples = 0; + imageDesc.buffer = NULL; + + float max_element; + float min_element; + + if(quotient > 0){ + // Interleave + interleaved_img = clCreateImage(priv->context, CL_MEM_READ_WRITE, &format, &imageDesc, NULL, 0); + + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_interleave, 0, sizeof(cl_mem), &device_array)); + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_interleave, 1, sizeof(cl_mem), &interleaved_img)); + + if(priv->precision == INT8){ + //Normalize i.e convert float array to 0-255 + float *host = ufo_buffer_get_host_array(inputs[0],cmd_queue); + min_element = ufo_buffer_min(inputs[0],cmd_queue); + max_element = ufo_buffer_max(inputs[0],cmd_queue); + + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_interleave, 2, sizeof(float), &min_element)); + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_interleave, 3, sizeof(float), &max_element)); + } + + size_t gsize_interleave[3] = {req.dims[0],req.dims[1],quotient}; + ufo_profiler_call(profiler, cmd_queue, kernel_interleave, 3, gsize_interleave, NULL); + + //Forward projection + reconstructed_buffer = clCreateBuffer(priv->context, CL_MEM_READ_WRITE, buffer_size, NULL, 0); + + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel_texture, 0, sizeof (cl_mem), &interleaved_img)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel_texture, 1, sizeof (cl_mem), &reconstructed_buffer)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel_texture, 2, sizeof (gfloat), &priv->axis_pos)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel_texture, 3, sizeof (gfloat), &priv->angle_step)); + UFO_RESOURCES_CHECK_CLERR (clSetKernelArg (kernel_texture, 4, sizeof(unsigned long), &requisition->dims[0])); + + size_t gsize_texture[3] = {dim_x,dim_y,quotient}; + size_t lSize[3] = {16,16,1}; + ufo_profiler_call(profiler, cmd_queue, kernel_texture, 3, gsize_texture, lSize); + + //Uninterleave + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_uninterleave, 0, sizeof(cl_mem), &reconstructed_buffer)); + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_uninterleave, 1, sizeof(cl_mem), &out_mem)); + if(priv->precision == INT8){ + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_uninterleave, 2, sizeof(float), &min_element)); + UFO_RESOURCES_CHECK_CLERR(clSetKernelArg(kernel_uninterleave, 3, sizeof(float), &max_element)); + } + + size_t gsize_uninterleave[3] = {requisition->dims[0],requisition->dims[1],quotient}; + ufo_profiler_call(profiler, cmd_queue, kernel_uninterleave, 3, gsize_uninterleave, NULL); + + UFO_RESOURCES_CHECK_CLERR(clReleaseMemObject(interleaved_img)); + UFO_RESOURCES_CHECK_CLERR(clReleaseMemObject(reconstructed_buffer)); + } + + size_t temp_size; + clGetMemObjectInfo(out_mem,CL_MEM_SIZE,sizeof(temp_size),&temp_size,NULL); + priv->out_mem_size += temp_size; + //fprintf(stdout, "Time taken GPU: %f Size: %zu \n", ufo_profiler_elapsed(profiler, UFO_PROFILER_TIMER_GPU), priv->out_mem_size); + +// fprintf(stdout, "Time taken: %f \n",ufo_profiler_elapsed(profiler,UFO_PROFILER_TIMER_GPU)); + return TRUE; +} + + +static void +ufo_stacked_forwardproject_task_set_property (GObject *object, + guint property_id, + const GValue *value, + GParamSpec *pspec) +{ + UfoStackedForwardprojectTaskPrivate *priv = UFO_STACKED_FORWARDPROJECT_TASK_GET_PRIVATE (object); + + switch (property_id) { + case PROP_AXIS_POSITION: + priv->axis_pos = g_value_get_float (value); + break; + case PROP_ANGLE_STEP: + priv->angle_step = g_value_get_float(value); + break; + case PROP_NUM_PROJECTIONS: + priv->num_projections = g_value_get_uint(value); + break; + case PROP_PRECISION: + priv->precision = g_value_get_enum(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); + break; + } +} + +static void +ufo_stacked_forwardproject_task_get_property (GObject *object, + guint property_id, + GValue *value, + GParamSpec *pspec) +{ + UfoStackedForwardprojectTaskPrivate *priv = UFO_STACKED_FORWARDPROJECT_TASK_GET_PRIVATE (object); + + switch (property_id) { + case PROP_AXIS_POSITION: + g_value_set_float (value, priv->axis_pos); + break; + case PROP_ANGLE_STEP: + g_value_set_float(value, priv->angle_step); + break; + case PROP_NUM_PROJECTIONS: + g_value_set_uint(value, priv->num_projections); + break; + case PROP_PRECISION: + g_value_set_enum(value,priv->precision); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); + break; + } +} + +static void +ufo_stacked_forwardproject_task_finalize (GObject *object) +{ + UfoStackedForwardprojectTaskPrivate *priv; + + priv = UFO_STACKED_FORWARDPROJECT_TASK_GET_PRIVATE (object); + + if (priv->interleave_single) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->interleave_single)); + priv->interleave_single = NULL; + } + + if (priv->texture_single) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->texture_single)); + priv->texture_single = NULL; + } + + if (priv->uninterleave_single) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->uninterleave_single)); + priv->uninterleave_single = NULL; + } + + if (priv->interleave_half) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->interleave_half)); + priv->interleave_half = NULL; + } + + if (priv->texture_half) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->texture_half)); + priv->texture_half = NULL; + } + + if (priv->uninterleave_half) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->uninterleave_half)); + priv->uninterleave_half = NULL; + } + + if (priv->interleave_uint) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->interleave_uint)); + priv->interleave_uint = NULL; + } + + if (priv->texture_uint) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->texture_uint)); + priv->texture_uint = NULL; + } + + if (priv->uninterleave_uint) { + UFO_RESOURCES_CHECK_CLERR (clReleaseKernel (priv->uninterleave_uint)); + priv->uninterleave_uint = NULL; + } + + if (priv->context) { + UFO_RESOURCES_CHECK_CLERR (clReleaseContext (priv->context)); + priv->context = NULL; + } + + G_OBJECT_CLASS (ufo_stacked_forwardproject_task_parent_class)->finalize (object); +} + +static void +ufo_task_interface_init (UfoTaskIface *iface) +{ + iface->setup = ufo_stacked_forwardproject_task_setup; + iface->get_num_inputs = ufo_stacked_forwardproject_task_get_num_inputs; + iface->get_num_dimensions = ufo_stacked_forwardproject_task_get_num_dimensions; + iface->get_mode = ufo_stacked_forwardproject_task_get_mode; + iface->get_requisition = ufo_stacked_forwardproject_task_get_requisition; + iface->process = ufo_stacked_forwardproject_task_process; +} + +static void +ufo_stacked_forwardproject_task_class_init (UfoStackedForwardprojectTaskClass *klass) +{ + GObjectClass *oclass = G_OBJECT_CLASS (klass); + + oclass->set_property = ufo_stacked_forwardproject_task_set_property; + oclass->get_property = ufo_stacked_forwardproject_task_get_property; + oclass->finalize = ufo_stacked_forwardproject_task_finalize; + + properties[PROP_AXIS_POSITION] = + g_param_spec_float ("axis-pos", + "Position of rotation axis", + "Position of rotation axis", + -G_MAXFLOAT, G_MAXFLOAT, -G_MAXFLOAT, + G_PARAM_READWRITE); + + properties[PROP_ANGLE_STEP] = + g_param_spec_float("angle-step", + "Increment of angle in radians", + "Increment of angle in radians", + -4.0f * ((gfloat) G_PI), + +4.0f * ((gfloat) G_PI), + 0.0f, + G_PARAM_READWRITE); + + properties[PROP_NUM_PROJECTIONS] = + g_param_spec_uint("number", + "Number of projections", + "Number of projections", + 1, 32768, 256, + G_PARAM_READWRITE); + + properties[PROP_PRECISION] = + g_param_spec_enum("precision-mode", + "Precision mode (\"int8\", \"half\", \"single\")", + "Precision mode (\"int8\", \"half\", \"single\")", + g_enum_register_static("ufo_stacked_forwardproject_precision", precision_values), + SINGLE, G_PARAM_READWRITE); + + for (guint i = PROP_0 + 1; i < N_PROPERTIES; i++) + g_object_class_install_property (oclass, i, properties[i]); + + g_type_class_add_private (oclass, sizeof(UfoStackedForwardprojectTaskPrivate)); +} + +static void +ufo_stacked_forwardproject_task_init(UfoStackedForwardprojectTask *self) +{ + self->priv = UFO_STACKED_FORWARDPROJECT_TASK_GET_PRIVATE(self); + + self->priv->axis_pos = -G_MAXFLOAT; + self->priv->num_projections = 256; + self->priv->angle_step = 0; + self->priv->precision = SINGLE; + self->priv->out_mem_size = 0; +} diff --git a/src/ufo-stacked-forwardproject-task.h b/src/ufo-stacked-forwardproject-task.h new file mode 100644 index 0000000..7f793aa --- /dev/null +++ b/src/ufo-stacked-forwardproject-task.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2011-2013 Karlsruhe Institute of Technology + * + * This file is part of Ufo. + * + * This library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __UFO_STACKED_FORWARDPROJECT_TASK_H +#define __UFO_STACKED_FORWARDPROJECT_TASK_H + +#include <ufo/ufo.h> + +G_BEGIN_DECLS + +#define UFO_TYPE_STACKED_FORWARDPROJECT_TASK (ufo_stacked_forwardproject_task_get_type()) +#define UFO_STACKED_FORWARDPROJECT_TASK(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj), UFO_TYPE_STACKED_FORWARDPROJECT_TASK, UfoStackedForwardprojectTask)) +#define UFO_IS_STACKED_FORWARDPROJECT_TASK(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj), UFO_TYPE_STACKED_FORWARDPROJECT_TASK)) +#define UFO_STACKED_FORWARDPROJECT_TASK_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass), UFO_TYPE_STACKED_FORWARDPROJECT_TASK, UfoStackedForwardprojectTaskClass)) +#define UFO_IS_STACKED_FORWARDPROJECT_TASK_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE((klass), UFO_TYPE_STACKED_FORWARDPROJECT_TASK)) +#define UFO_STACKED_FORWARDPROJECT_TASK_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), UFO_TYPE_STACKED_FORWARDPROJECT_TASK, UfoStackedForwardprojectTaskClass)) + +typedef struct _UfoStackedForwardprojectTask UfoStackedForwardprojectTask; +typedef struct _UfoStackedForwardprojectTaskClass UfoStackedForwardprojectTaskClass; +typedef struct _UfoStackedForwardprojectTaskPrivate UfoStackedForwardprojectTaskPrivate; + +struct _UfoStackedForwardprojectTask { + UfoTaskNode parent_instance; + + UfoStackedForwardprojectTaskPrivate *priv; +}; + +struct _UfoStackedForwardprojectTaskClass { + UfoTaskNodeClass parent_class; +}; + +UfoNode *ufo_stacked_forwardproject_task_new (void); +GType ufo_stacked_forwardproject_task_get_type (void); + +G_END_DECLS + +#endif |