diff options
author | Matthias Vogelgesang <matthias.vogelgesang@kit.edu> | 2011-12-01 16:00:59 +0100 |
---|---|---|
committer | Matthias Vogelgesang <matthias.vogelgesang@kit.edu> | 2011-12-01 16:00:59 +0100 |
commit | cb3a933340614866f7b6a3dd0977956450a161dd (patch) | |
tree | bb386a33530c89e815c0eb4850d98d4f58bffccb | |
parent | 845a638d4c7183ed91b223fbd5047a9cc27b47c8 (diff) | |
download | ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.tar.gz ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.tar.bz2 ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.tar.xz ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.zip |
Add: optional SSE intrinsics
-rw-r--r-- | CMakeLists.txt | 29 | ||||
-rw-r--r-- | src/config.h.in | 1 | ||||
-rw-r--r-- | src/libipe.c | 54 |
3 files changed, 78 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f519ee..f541898 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,13 +28,40 @@ if(NOT DEFINED LOCALE_INSTALL_DIR) endif(NOT DEFINED LOCALE_INSTALL_DIR) +# --- Look for SSE support -------------------------------------------------- +include(CheckCXXSourceRuns) +set(SSE_FLAGS) +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_REQUIRED_FLAGS "-msse") + check_cxx_source_runs(" + #include <xmmintrin.h> + int main() + { + __m128 a, b; + float vals[4] = {0}; + a = _mm_loadu_ps(vals); + b = a; + b = _mm_add_ps(a,b); + _mm_storeu_ps(vals,b); + return 0; + }" + SSE_AVAILABLE) + + set(CMAKE_REQUIRED_FLAGS) + + if (SSE_AVAILABLE) + option(HAVE_SSE "Use SSE extensions" ON) + set(SSE_FLAGS "-msse") + endif() +endif() + # --- Build library and install --------------------------------------------- include_directories( ${CMAKE_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR} ) -add_definitions("--std=c99 -Wall -O2") +add_definitions("--std=c99 -Wall -O2 ${SSE_FLAGS}") add_library(ipe SHARED src/libipe.c) diff --git a/src/config.h.in b/src/config.h.in index e5d0189..37a1399 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -1 +1,2 @@ #cmakedefine DEBUG +#cmakedefine HAVE_SSE diff --git a/src/libipe.c b/src/libipe.c index b0db74d..466056a 100644 --- a/src/libipe.c +++ b/src/libipe.c @@ -6,6 +6,7 @@ #include "libipe.h" #include "libipe-private.h" #include "config.h" +#include <xmmintrin.h> #define IPECAMERA_NUM_CHANNELS 16 #define IPECAMERA_PIXELS_PER_CHANNEL 128 @@ -84,6 +85,14 @@ static int ipe_decode_frame(uint16_t *pixel_buffer, uint32_t *raw, int num_rows, uint32_t data; const int bytes = 43; +#ifdef HAVE_SSE + const uint32_t mask = 0x3FF; + __m128i mmask = _mm_set_epi32(mask, mask, mask, mask); + __m128i packed; + __m128i tmp1, tmp2; + uint32_t result[4] __attribute__ ((aligned (16))) = {0}; +#endif + do { info = raw[0]; row = (info >> 4) & 0x7FF; @@ -104,16 +113,51 @@ static int ipe_decode_frame(uint16_t *pixel_buffer, uint32_t *raw, int num_rows, /* "Correct" missing pixel */ if ((row < 2) && (pixels == (IPECAMERA_PIXELS_PER_CHANNEL - 1))) { pixel_buffer[base] = 0; - base++; - } + /* base++; */ + } #ifdef DEBUG else CHECK_FLAG("number of pixels, %i is expected", pixels == IPECAMERA_PIXELS_PER_CHANNEL, pixels, IPECAMERA_PIXELS_PER_CHANNEL); #endif - for (int i = 1; i < bytes; i++) { +#ifdef HAVE_SSE + for (int i = 1 ; i < bytes-4; i += 4, base += 12) { + packed = _mm_set_epi32(raw[i], raw[i+1], raw[i+2], raw[i+3]); + + tmp1 = _mm_srli_epi32(packed, 20); + tmp2 = _mm_and_si128(tmp1, mmask); + _mm_storeu_si128((__m128i*) result, tmp2); + pixel_buffer[base] = result[0]; + pixel_buffer[base+3] = result[1]; + pixel_buffer[base+6] = result[2]; + pixel_buffer[base+9] = result[3]; + + tmp1 = _mm_srli_epi32(packed, 10); + tmp2 = _mm_and_si128(tmp1, mmask); + _mm_storeu_si128((__m128i*) result, tmp2); + pixel_buffer[base+1] = result[0]; + pixel_buffer[base+4] = result[1]; + pixel_buffer[base+7] = result[2]; + pixel_buffer[base+10] = result[3]; + + tmp1 = _mm_and_si128(packed, mmask); + _mm_storeu_si128((__m128i*) result, tmp1); + pixel_buffer[base+2] = result[0]; + pixel_buffer[base+5] = result[1]; + pixel_buffer[base+8] = result[2]; + pixel_buffer[base+11] = result[3]; + } + + /* Compute last pixels the usual way */ + for (int i = bytes-4; i < bytes; i++) { + data = raw[i]; + pixel_buffer[base++] = (data >> 20) & 0x3FF; + pixel_buffer[base++] = (data >> 10) & 0x3FF; + pixel_buffer[base++] = data & 0x3FF; + } +#else + for (int i = 1 ; i < bytes; i++) { data = raw[i]; - #ifdef DEBUG header = (data >> 30) & 0x03; CHECK_FLAG("raw data magick", header == 3, header); @@ -124,9 +168,9 @@ static int ipe_decode_frame(uint16_t *pixel_buffer, uint32_t *raw, int num_rows, pixel_buffer[base++] = (data >> 10) & 0x3FF; pixel_buffer[base++] = data & 0x3FF; } +#endif data = raw[bytes]; - #ifdef DEBUG header = (data >> 30) & 0x03; CHECK_FLAG("raw data magick", header == 3, header); |