From c6db5ea47cdbedd3a17a17984b231e11476bfa97 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Sun, 25 Nov 2012 08:04:11 +0100 Subject: Use memcpy implementation by Daniel Vik --- CMakeLists.txt | 2 +- fastwriter.c | 7 +- memcpy.c | 344 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ memcpy.h | 63 +++++++++++ 4 files changed, 412 insertions(+), 4 deletions(-) create mode 100644 memcpy.c create mode 100644 memcpy.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a95a93..e6d683e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ include_directories( add_definitions("-fPIC --std=c99 -Wall -O2 -pthread") set(HEADERS fastwriter.h sysinfo.h default.h private.h) -add_library(fastwriter SHARED fastwriter.c sysinfo.c default.c) +add_library(fastwriter SHARED fastwriter.c sysinfo.c default.c memcpy.c) set_target_properties(fastwriter PROPERTIES VERSION ${FASTWRITER_VERSION} diff --git a/fastwriter.c b/fastwriter.c index e812681..c5bf301 100644 --- a/fastwriter.c +++ b/fastwriter.c @@ -19,6 +19,7 @@ #include "private.h" #include "default.h" #include "sysinfo.h" +#include "memcpy.h" fastwriter_t *fastwriter_init(const char *fs, fastwriter_flags_t flags) { fastwriter_t *ctx; @@ -275,11 +276,11 @@ int fastwriter_push(fastwriter_t *ctx, size_t size, const void *data) { if (part1 < size) { // tail < pos (we have checked for free space) end = size - part1; - memcpy(ctx->buffer + ctx->pos, data, part1); - memcpy(ctx->buffer, data + part1, end); + fast_memcpy(ctx->buffer + ctx->pos, data, part1); + fast_memcpy(ctx->buffer, data + part1, end); ctx->pos = end; } else { - memcpy(ctx->buffer + ctx->pos, data, size); + fast_memcpy(ctx->buffer + ctx->pos, data, size); ctx->pos += size; if (ctx->pos == ctx->size) ctx->pos = 0; diff --git a/memcpy.c b/memcpy.c new file mode 100644 index 0000000..5c29d01 --- /dev/null +++ b/memcpy.c @@ -0,0 +1,344 @@ +/******************************************************************** + ** File: memcpy.c + ** + ** Copyright (C) 1999-2010 Daniel Vik + ** + ** This software is provided 'as-is', without any express or implied + ** warranty. In no event will the authors be held liable for any + ** damages arising from the use of this software. + ** Permission is granted to anyone to use this software for any + ** purpose, including commercial applications, and to alter it and + ** redistribute it freely, subject to the following restrictions: + ** + ** 1. The origin of this software must not be misrepresented; you + ** must not claim that you wrote the original software. If you + ** use this software in a product, an acknowledgment in the + ** use this software in a product, an acknowledgment in the + ** product documentation would be appreciated but is not + ** required. + ** + ** 2. Altered source versions must be plainly marked as such, and + ** must not be misrepresented as being the original software. + ** + ** 3. This notice may not be removed or altered from any source + ** distribution. + ** + ** + ** Description: Implementation of the standard library function memcpy. + ** This implementation of memcpy() is ANSI-C89 compatible. + ** + ** The following configuration options can be set: + ** + ** LITTLE_ENDIAN - Uses processor with little endian + ** addressing. Default is big endian. + ** + ** PRE_INC_PTRS - Use pre increment of pointers. + ** Default is post increment of + ** pointers. + ** + ** INDEXED_COPY - Copying data using array indexing. + ** Using this option, disables the + ** PRE_INC_PTRS option. + ** + ** MEMCPY_64BIT - Compiles memcpy for 64 bit + ** architectures + ** + ** + ** Best Settings: + ** + ** Intel x86: LITTLE_ENDIAN and INDEXED_COPY + ** + *******************************************************************/ + + + +/******************************************************************** + ** Configuration definitions. + *******************************************************************/ + +#define LITTLE_ENDIAN +#define INDEXED_COPY + + +/******************************************************************** + ** Includes for size_t definition + *******************************************************************/ + +#include + + +/******************************************************************** + ** Typedefs + *******************************************************************/ + +typedef unsigned char UInt8; +typedef unsigned short UInt16; +typedef unsigned int UInt32; +#ifdef _WIN32 +typedef unsigned __int64 UInt64; +#else +typedef unsigned long long UInt64; +#endif + +#ifdef MEMCPY_64BIT +typedef UInt64 UIntN; +#define TYPE_WIDTH 8L +#else +typedef UInt32 UIntN; +#define TYPE_WIDTH 4L +#endif + + +/******************************************************************** + ** Remove definitions when INDEXED_COPY is defined. + *******************************************************************/ + +#if defined (INDEXED_COPY) +#if defined (PRE_INC_PTRS) +#undef PRE_INC_PTRS +#endif /*PRE_INC_PTRS*/ +#endif /*INDEXED_COPY*/ + + + +/******************************************************************** + ** Definitions for pre and post increment of pointers. + *******************************************************************/ + +#if defined (PRE_INC_PTRS) + +#define START_VAL(x) (x)-- +#define INC_VAL(x) *++(x) +#define CAST_TO_U8(p, o) ((UInt8*)p + o + TYPE_WIDTH) +#define WHILE_DEST_BREAK (TYPE_WIDTH - 1) +#define PRE_LOOP_ADJUST - (TYPE_WIDTH - 1) +#define PRE_SWITCH_ADJUST + 1 + +#else /*PRE_INC_PTRS*/ + +#define START_VAL(x) +#define INC_VAL(x) *(x)++ +#define CAST_TO_U8(p, o) ((UInt8*)p + o) +#define WHILE_DEST_BREAK 0 +#define PRE_LOOP_ADJUST +#define PRE_SWITCH_ADJUST + +#endif /*PRE_INC_PTRS*/ + + + +/******************************************************************** + ** Definitions for endians + *******************************************************************/ + +#if defined (LITTLE_ENDIAN) + +#define SHL >> +#define SHR << + +#else /* LITTLE_ENDIAN */ + +#define SHL << +#define SHR >> + +#endif /* LITTLE_ENDIAN */ + + + +/******************************************************************** + ** Macros for copying words of different alignment. + ** Uses incremening pointers. + *******************************************************************/ + +#define CP_INCR() { \ + INC_VAL(dstN) = INC_VAL(srcN); \ +} + +#define CP_INCR_SH(shl, shr) { \ + dstWord = srcWord SHL shl; \ + srcWord = INC_VAL(srcN); \ + dstWord |= srcWord SHR shr; \ + INC_VAL(dstN) = dstWord; \ +} + + + +/******************************************************************** + ** Macros for copying words of different alignment. + ** Uses array indexes. + *******************************************************************/ + +#define CP_INDEX(idx) { \ + dstN[idx] = srcN[idx]; \ +} + +#define CP_INDEX_SH(x, shl, shr) { \ + dstWord = srcWord SHL shl; \ + srcWord = srcN[x]; \ + dstWord |= srcWord SHR shr; \ + dstN[x] = dstWord; \ +} + + + +/******************************************************************** + ** Macros for copying words of different alignment. + ** Uses incremening pointers or array indexes depending on + ** configuration. + *******************************************************************/ + +#if defined (INDEXED_COPY) + +#define CP(idx) CP_INDEX(idx) +#define CP_SH(idx, shl, shr) CP_INDEX_SH(idx, shl, shr) + +#define INC_INDEX(p, o) ((p) += (o)) + +#else /* INDEXED_COPY */ + +#define CP(idx) CP_INCR() +#define CP_SH(idx, shl, shr) CP_INCR_SH(shl, shr) + +#define INC_INDEX(p, o) + +#endif /* INDEXED_COPY */ + + +#define COPY_REMAINING(count) { \ + START_VAL(dst8); \ + START_VAL(src8); \ + \ + switch (count) { \ + case 7: INC_VAL(dst8) = INC_VAL(src8); \ + case 6: INC_VAL(dst8) = INC_VAL(src8); \ + case 5: INC_VAL(dst8) = INC_VAL(src8); \ + case 4: INC_VAL(dst8) = INC_VAL(src8); \ + case 3: INC_VAL(dst8) = INC_VAL(src8); \ + case 2: INC_VAL(dst8) = INC_VAL(src8); \ + case 1: INC_VAL(dst8) = INC_VAL(src8); \ + case 0: \ + default: break; \ + } \ +} + +#define COPY_NO_SHIFT() { \ + UIntN* dstN = (UIntN*)(dst8 PRE_LOOP_ADJUST); \ + UIntN* srcN = (UIntN*)(src8 PRE_LOOP_ADJUST); \ + size_t length = count / TYPE_WIDTH; \ + \ + while (length & 7) { \ + CP_INCR(); \ + length--; \ + } \ + \ + length /= 8; \ + \ + while (length--) { \ + CP(0); \ + CP(1); \ + CP(2); \ + CP(3); \ + CP(4); \ + CP(5); \ + CP(6); \ + CP(7); \ + \ + INC_INDEX(dstN, 8); \ + INC_INDEX(srcN, 8); \ + } \ + \ + src8 = CAST_TO_U8(srcN, 0); \ + dst8 = CAST_TO_U8(dstN, 0); \ + \ + COPY_REMAINING(count & (TYPE_WIDTH - 1)); \ + \ + return dest; \ +} + + + +#define COPY_SHIFT(shift) { \ + UIntN* dstN = (UIntN*)((((UIntN)dst8) PRE_LOOP_ADJUST) & \ + ~(TYPE_WIDTH - 1)); \ + UIntN* srcN = (UIntN*)((((UIntN)src8) PRE_LOOP_ADJUST) & \ + ~(TYPE_WIDTH - 1)); \ + size_t length = count / TYPE_WIDTH; \ + UIntN srcWord = INC_VAL(srcN); \ + UIntN dstWord; \ + \ + while (length & 7) { \ + CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift)); \ + length--; \ + } \ + \ + length /= 8; \ + \ + while (length--) { \ + CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift)); \ + \ + INC_INDEX(dstN, 8); \ + INC_INDEX(srcN, 8); \ + } \ + \ + src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH)); \ + dst8 = CAST_TO_U8(dstN, 0); \ + \ + COPY_REMAINING(count & (TYPE_WIDTH - 1)); \ + \ + return dest; \ +} + + +/******************************************************************** + ** + ** void *memcpy(void *dest, const void *src, size_t count) + ** + ** Args: dest - pointer to destination buffer + ** src - pointer to source buffer + ** count - number of bytes to copy + ** + ** Return: A pointer to destination buffer + ** + ** Purpose: Copies count bytes from src to dest. + ** No overlap check is performed. + ** + *******************************************************************/ + +void *fast_memcpy(void *dest, const void *src, size_t count) +{ + UInt8* dst8 = (UInt8*)dest; + UInt8* src8 = (UInt8*)src; + + if (count < 8) { + COPY_REMAINING(count); + return dest; + } + + START_VAL(dst8); + START_VAL(src8); + + while (((UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) { + INC_VAL(dst8) = INC_VAL(src8); + count--; + } + + switch ((((UIntN)src8) PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) { + case 0: COPY_NO_SHIFT(); break; + case 1: COPY_SHIFT(1); break; + case 2: COPY_SHIFT(2); break; + case 3: COPY_SHIFT(3); break; +#if TYPE_WIDTH > 4 + case 4: COPY_SHIFT(4); break; + case 5: COPY_SHIFT(5); break; + case 6: COPY_SHIFT(6); break; + case 7: COPY_SHIFT(7); break; +#endif + } +} diff --git a/memcpy.h b/memcpy.h new file mode 100644 index 0000000..0714823 --- /dev/null +++ b/memcpy.h @@ -0,0 +1,63 @@ +/******************************************************************** + ** File: memcpy.h + ** + ** Copyright (C) 2005 Daniel Vik + ** + ** This software is provided 'as-is', without any express or implied + ** warranty. In no event will the authors be held liable for any + ** damages arising from the use of this software. + ** Permission is granted to anyone to use this software for any + ** purpose, including commercial applications, and to alter it and + ** redistribute it freely, subject to the following restrictions: + ** + ** 1. The origin of this software must not be misrepresented; you + ** must not claim that you wrote the original software. If you + ** use this software in a product, an acknowledgment in the + ** use this software in a product, an acknowledgment in the + ** product documentation would be appreciated but is not + ** required. + ** + ** 2. Altered source versions must be plainly marked as such, and + ** must not be misrepresented as being the original software. + ** + ** 3. This notice may not be removed or altered from any source + ** distribution. + ** + ** + ** Description: Implementation of the standard library function memcpy. + ** This implementation of memcpy() is ANSI-C89 compatible. + ** + *******************************************************************/ + + +/******************************************************************** + ** Includes for size_t definition + *******************************************************************/ + +#include + + +/******************************************************************** + ** + ** void *memcpy(void *dest, const void *src, size_t count) + ** + ** Args: dest - pointer to destination buffer + ** src - pointer to source buffer + ** count - number of bytes to copy + ** + ** Return: A pointer to destination buffer + ** + ** Purpose: Copies count bytes from src to dest. No overlap check + ** is performed. + ** + *******************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +void *fast_memcpy(void *dest, const void *src, size_t count); + +#ifdef __cplusplus +} +#endif -- cgit v1.2.3 From 83ebc0af67cf2d4cd5a45fa253dac88f1cb3b1e4 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Tue, 27 Nov 2012 02:23:18 +0100 Subject: Seems new memcpy is only good for ipepdvcompute2, make it optional and disabled by default --- CMakeLists.txt | 12 +++++++++--- config.h.in | 3 ++- fastwriter.c | 9 +++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e6d683e..66d5cbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ set(FASTWRITER_ABI_VERSION "0") cmake_minimum_required(VERSION 2.8) set(DISABLE_XFS_REALTIME FALSE CACHE BOOL "Disable support of RealTime XFS partition") - +set(USE_CUSTOM_MEMCPY FALSE CACHE BOOL "Use custom memcpy routine instead of stanadrd") include(CheckIncludeFiles) check_include_files("linux/falloc.h" HAVE_LINUX_FALLOC_H) @@ -25,8 +25,14 @@ include_directories( add_definitions("-fPIC --std=c99 -Wall -O2 -pthread") -set(HEADERS fastwriter.h sysinfo.h default.h private.h) -add_library(fastwriter SHARED fastwriter.c sysinfo.c default.c memcpy.c) +if (USE_CUSTOM_MEMCPY) + set(HEADERS fastwriter.h sysinfo.h default.h private.h memcpy.h) + add_library(fastwriter SHARED fastwriter.c sysinfo.c default.c memcpy.c) +else (USE_CUSTOM_MEMCPY) + set(HEADERS fastwriter.h sysinfo.h default.h private.h) + add_library(fastwriter SHARED fastwriter.c sysinfo.c default.c) +endif (USE_CUSTOM_MEMCPY) + set_target_properties(fastwriter PROPERTIES VERSION ${FASTWRITER_VERSION} diff --git a/config.h.in b/config.h.in index 3627160..475acc5 100644 --- a/config.h.in +++ b/config.h.in @@ -1,2 +1,3 @@ #cmakedefine HAVE_LINUX_FALLOC_H -#cmakedefine DISABLE_XFS_REALTIME \ No newline at end of file +#cmakedefine DISABLE_XFS_REALTIME +#cmakedefine USE_CUSTOM_MEMCPY diff --git a/fastwriter.c b/fastwriter.c index c5bf301..08722fa 100644 --- a/fastwriter.c +++ b/fastwriter.c @@ -15,11 +15,16 @@ #include - #include "private.h" #include "default.h" #include "sysinfo.h" -#include "memcpy.h" + +#ifdef USE_CUSTOM_MEMCPY +# include "memcpy.h" +#else /* USE_CUSTOM_MEMCPY */ +# define fast_memcpy memcpy +#endif /* USE_CUSTOM_MEMCPY */ + fastwriter_t *fastwriter_init(const char *fs, fastwriter_flags_t flags) { fastwriter_t *ctx; -- cgit v1.2.3