summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Vogelgesang <matthias.vogelgesang@kit.edu>2011-12-01 16:00:59 +0100
committerMatthias Vogelgesang <matthias.vogelgesang@kit.edu>2011-12-01 16:00:59 +0100
commitcb3a933340614866f7b6a3dd0977956450a161dd (patch)
treebb386a33530c89e815c0eb4850d98d4f58bffccb
parent845a638d4c7183ed91b223fbd5047a9cc27b47c8 (diff)
downloadufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.tar.gz
ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.tar.bz2
ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.tar.xz
ufodecode-cb3a933340614866f7b6a3dd0977956450a161dd.zip
Add: optional SSE intrinsics
-rw-r--r--CMakeLists.txt29
-rw-r--r--src/config.h.in1
-rw-r--r--src/libipe.c54
3 files changed, 78 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f519ee..f541898 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,13 +28,40 @@ if(NOT DEFINED LOCALE_INSTALL_DIR)
endif(NOT DEFINED LOCALE_INSTALL_DIR)
+# --- Look for SSE support --------------------------------------------------
+include(CheckCXXSourceRuns)
+set(SSE_FLAGS)
+if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
+ set(CMAKE_REQUIRED_FLAGS "-msse")
+ check_cxx_source_runs("
+ #include <xmmintrin.h>
+ int main()
+ {
+ __m128 a, b;
+ float vals[4] = {0};
+ a = _mm_loadu_ps(vals);
+ b = a;
+ b = _mm_add_ps(a,b);
+ _mm_storeu_ps(vals,b);
+ return 0;
+ }"
+ SSE_AVAILABLE)
+
+ set(CMAKE_REQUIRED_FLAGS)
+
+ if (SSE_AVAILABLE)
+ option(HAVE_SSE "Use SSE extensions" ON)
+ set(SSE_FLAGS "-msse")
+ endif()
+endif()
+
# --- Build library and install ---------------------------------------------
include_directories(
${CMAKE_SOURCE_DIR}/src
${CMAKE_CURRENT_BINARY_DIR}
)
-add_definitions("--std=c99 -Wall -O2")
+add_definitions("--std=c99 -Wall -O2 ${SSE_FLAGS}")
add_library(ipe SHARED src/libipe.c)
diff --git a/src/config.h.in b/src/config.h.in
index e5d0189..37a1399 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -1 +1,2 @@
#cmakedefine DEBUG
+#cmakedefine HAVE_SSE
diff --git a/src/libipe.c b/src/libipe.c
index b0db74d..466056a 100644
--- a/src/libipe.c
+++ b/src/libipe.c
@@ -6,6 +6,7 @@
#include "libipe.h"
#include "libipe-private.h"
#include "config.h"
+#include <xmmintrin.h>
#define IPECAMERA_NUM_CHANNELS 16
#define IPECAMERA_PIXELS_PER_CHANNEL 128
@@ -84,6 +85,14 @@ static int ipe_decode_frame(uint16_t *pixel_buffer, uint32_t *raw, int num_rows,
uint32_t data;
const int bytes = 43;
+#ifdef HAVE_SSE
+ const uint32_t mask = 0x3FF;
+ __m128i mmask = _mm_set_epi32(mask, mask, mask, mask);
+ __m128i packed;
+ __m128i tmp1, tmp2;
+ uint32_t result[4] __attribute__ ((aligned (16))) = {0};
+#endif
+
do {
info = raw[0];
row = (info >> 4) & 0x7FF;
@@ -104,16 +113,51 @@ static int ipe_decode_frame(uint16_t *pixel_buffer, uint32_t *raw, int num_rows,
/* "Correct" missing pixel */
if ((row < 2) && (pixels == (IPECAMERA_PIXELS_PER_CHANNEL - 1))) {
pixel_buffer[base] = 0;
- base++;
- }
+ /* base++; */
+ }
#ifdef DEBUG
else
CHECK_FLAG("number of pixels, %i is expected", pixels == IPECAMERA_PIXELS_PER_CHANNEL, pixels, IPECAMERA_PIXELS_PER_CHANNEL);
#endif
- for (int i = 1; i < bytes; i++) {
+#ifdef HAVE_SSE
+ for (int i = 1 ; i < bytes-4; i += 4, base += 12) {
+ packed = _mm_set_epi32(raw[i], raw[i+1], raw[i+2], raw[i+3]);
+
+ tmp1 = _mm_srli_epi32(packed, 20);
+ tmp2 = _mm_and_si128(tmp1, mmask);
+ _mm_storeu_si128((__m128i*) result, tmp2);
+ pixel_buffer[base] = result[0];
+ pixel_buffer[base+3] = result[1];
+ pixel_buffer[base+6] = result[2];
+ pixel_buffer[base+9] = result[3];
+
+ tmp1 = _mm_srli_epi32(packed, 10);
+ tmp2 = _mm_and_si128(tmp1, mmask);
+ _mm_storeu_si128((__m128i*) result, tmp2);
+ pixel_buffer[base+1] = result[0];
+ pixel_buffer[base+4] = result[1];
+ pixel_buffer[base+7] = result[2];
+ pixel_buffer[base+10] = result[3];
+
+ tmp1 = _mm_and_si128(packed, mmask);
+ _mm_storeu_si128((__m128i*) result, tmp1);
+ pixel_buffer[base+2] = result[0];
+ pixel_buffer[base+5] = result[1];
+ pixel_buffer[base+8] = result[2];
+ pixel_buffer[base+11] = result[3];
+ }
+
+ /* Compute last pixels the usual way */
+ for (int i = bytes-4; i < bytes; i++) {
+ data = raw[i];
+ pixel_buffer[base++] = (data >> 20) & 0x3FF;
+ pixel_buffer[base++] = (data >> 10) & 0x3FF;
+ pixel_buffer[base++] = data & 0x3FF;
+ }
+#else
+ for (int i = 1 ; i < bytes; i++) {
data = raw[i];
-
#ifdef DEBUG
header = (data >> 30) & 0x03;
CHECK_FLAG("raw data magick", header == 3, header);
@@ -124,9 +168,9 @@ static int ipe_decode_frame(uint16_t *pixel_buffer, uint32_t *raw, int num_rows,
pixel_buffer[base++] = (data >> 10) & 0x3FF;
pixel_buffer[base++] = data & 0x3FF;
}
+#endif
data = raw[bytes];
-
#ifdef DEBUG
header = (data >> 30) & 0x03;
CHECK_FLAG("raw data magick", header == 3, header);