summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt2
-rw-r--r--dma/nwl_private.h2
-rw-r--r--pcilib/CMakeLists.txt6
-rw-r--r--pcilib/cpu.c115
-rw-r--r--pcilib/cpu.h17
-rw-r--r--pcilib/datacpy.c90
-rw-r--r--pcilib/datacpy.h21
-rw-r--r--pcilib/dma.c6
-rw-r--r--pcilib/memcpy.c66
-rw-r--r--pcilib/memcpy.h22
-rw-r--r--pcilib/pagecpy.c153
-rw-r--r--pcilib/pagecpy.h29
-rw-r--r--pcilib/pci.h3
-rw-r--r--pcilib/pcilib.h2
-rw-r--r--pcilib/py.c2
-rw-r--r--pcilib/register.c1
-rw-r--r--pcilib/timing.c89
-rw-r--r--pcilib/timing.h25
-rw-r--r--pcilib/tools.c249
-rw-r--r--pcilib/tools.h21
-rw-r--r--pcitool/cli.c4
-rw-r--r--protocols/default.c1
-rw-r--r--protocols/software.c43
23 files changed, 674 insertions, 295 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a938a3..1db3de0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
project(pcitool)
-set(PCILIB_VERSION "0.2.3")
+set(PCILIB_VERSION "0.2.4")
set(PCILIB_ABI_VERSION "2")
cmake_minimum_required(VERSION 2.6)
diff --git a/dma/nwl_private.h b/dma/nwl_private.h
index 08f0feb..d263f79 100644
--- a/dma/nwl_private.h
+++ b/dma/nwl_private.h
@@ -17,6 +17,8 @@ typedef struct pcilib_nwl_engine_context_s pcilib_nwl_engine_context_t;
#define PCILIB_NWL_REGISTER_TIMEOUT 10000 /**< us */
+#include "datacpy.h"
+
#include "nwl.h"
#include "nwl_irq.h"
#include "nwl_engine.h"
diff --git a/pcilib/CMakeLists.txt b/pcilib/CMakeLists.txt
index 5d84ddc..cdc9c3f 100644
--- a/pcilib/CMakeLists.txt
+++ b/pcilib/CMakeLists.txt
@@ -8,8 +8,8 @@ include_directories(
${UTHASH_INCLUDE_DIRS}
)
-set(HEADERS pcilib.h pci.h export.h value.h bar.h fifo.h model.h bank.h register.h view.h property.h unit.h xml.h py.h kmem.h irq.h locking.h lock.h dma.h event.h plugin.h tools.h error.h debug.h env.h version.h config.h)
-add_library(pcilib SHARED pci.c export.c value.c bar.c fifo.c model.c bank.c register.c view.c unit.c property.c xml.c py.c kmem.c irq.c locking.c lock.c dma.c event.c plugin.c tools.c error.c debug.c env.c )
+set(HEADERS pcilib.h pci.h datacpy.h memcpy.h pagecpy.h cpu.h timing.h export.h value.h bar.h fifo.h model.h bank.h register.h view.h property.h unit.h xml.h py.h kmem.h irq.h locking.h lock.h dma.h event.h plugin.h tools.h error.h debug.h env.h version.h config.h)
+add_library(pcilib SHARED pci.c datacpy.c memcpy.c pagecpy.c cpu.c timing.c export.c value.c bar.c fifo.c model.c bank.c register.c view.c unit.c property.c xml.c py.c kmem.c irq.c locking.c lock.c dma.c event.c plugin.c tools.c error.c debug.c env.c )
target_link_libraries(pcilib dma protocols views ${CMAKE_THREAD_LIBS_INIT} ${UFODECODE_LIBRARIES} ${CMAKE_DL_LIBS} ${EXTRA_SYSTEM_LIBS} ${LIBXML2_LIBRARIES} ${PYTHON_LIBRARIES})
add_dependencies(pcilib dma protocols views)
@@ -21,6 +21,6 @@ install(FILES pcilib.h
DESTINATION include
)
-install(FILES bar.h kmem.h locking.h lock.h bank.h register.h xml.h dma.h event.h model.h error.h debug.h env.h tools.h export.h version.h view.h unit.h
+install(FILES bar.h kmem.h locking.h lock.h bank.h register.h xml.h dma.h event.h model.h error.h debug.h env.h tools.h timing.h cpu.h datacpy.h pagecpy.h memcpy.h export.h version.h view.h unit.h
DESTINATION include/pcilib
)
diff --git a/pcilib/cpu.c b/pcilib/cpu.c
new file mode 100644
index 0000000..7fc86f5
--- /dev/null
+++ b/pcilib/cpu.c
@@ -0,0 +1,115 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+static void pcilib_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) {
+ uint32_t ebx = 0, edx;
+# if defined( __i386__ ) && defined ( __PIC__ )
+ /* in case of PIC under 32-bit EBX cannot be clobbered */
+ __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
+# else
+ __asm__ ( "cpuid" : "+b" (ebx),
+# endif
+ "+a" (eax), "+c" (ecx), "=d" (edx) );
+ abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
+}
+
+static int pcilib_check_xcr0_ymm() {
+ uint32_t xcr0;
+ __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
+ return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
+}
+
+static int pcilib_check_4th_gen_intel_core_features() {
+ uint32_t abcd[4];
+ uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27));
+ uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8);
+
+ /* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1 &&
+ CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 &&
+ CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */
+ pcilib_run_cpuid( 1, 0, abcd );
+ if ( (abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask )
+ return 0;
+
+ if ( ! pcilib_check_xcr0_ymm() )
+ return 0;
+
+ /* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 &&
+ CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1 &&
+ CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
+ pcilib_run_cpuid( 7, 0, abcd );
+ if ( (abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask )
+ return 0;
+
+ /* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */
+ pcilib_run_cpuid( 0x80000001, 0, abcd );
+ if ( (abcd[2] & (1 << 5)) == 0)
+ return 0;
+
+ return 1;
+}
+
+static int pcilib_detect_cpu_gen() {
+ if (pcilib_check_4th_gen_intel_core_features())
+ return 4;
+ return 0;
+}
+
+int pcilib_get_cpu_gen() {
+ int gen = -1;
+
+ if (gen < 0 )
+ gen = pcilib_detect_cpu_gen();
+
+ return gen;
+}
+
+int pcilib_get_page_mask() {
+ int pagesize,pagemask,temp;
+
+ pagesize = sysconf(_SC_PAGESIZE);
+
+ for( pagemask=0, temp = pagesize; temp != 1; ) {
+ temp = (temp >> 1);
+ pagemask = (pagemask << 1)+1;
+ }
+ return pagemask;
+}
+
+int pcilib_get_cpu_count() {
+ int err;
+
+ int cpu_count;
+ cpu_set_t mask;
+
+ err = sched_getaffinity(getpid(), sizeof(mask), &mask);
+ if (err) return 1;
+
+#ifdef CPU_COUNT
+ cpu_count = CPU_COUNT(&mask);
+#else
+ for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
+ if (!CPU_ISSET(cpu_count, &mask)) break;
+ }
+#endif
+
+ if (!cpu_count) cpu_count = PCILIB_DEFAULT_CPU_COUNT;
+ return cpu_count;
+}
+
diff --git a/pcilib/cpu.h b/pcilib/cpu.h
new file mode 100644
index 0000000..2b3ed80
--- /dev/null
+++ b/pcilib/cpu.h
@@ -0,0 +1,17 @@
+#ifndef _PCILIB_CPU_H
+#define _PCILIB_CPU_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int pcilib_get_page_mask();
+int pcilib_get_cpu_count();
+int pcilib_get_cpu_gen();
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PCILIB_CPU_H */
diff --git a/pcilib/datacpy.c b/pcilib/datacpy.c
new file mode 100644
index 0000000..15dfbe9
--- /dev/null
+++ b/pcilib/datacpy.c
@@ -0,0 +1,90 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+void *pcilib_datacpy32(void * dst, void const * src, size_t n, pcilib_endianess_t endianess) {
+ uint32_t * plDst = (uint32_t *) dst;
+ uint32_t const * plSrc = (uint32_t const *) src;
+
+ int swap = 0;
+
+ if (endianess)
+ swap = (endianess == PCILIB_BIG_ENDIAN)?(ntohs(1)!=1):(ntohs(1)==1);
+
+ if (swap) {
+ while (n > 0) {
+ *plDst = ntohl(*plSrc);
+ ++plSrc;
+ ++plDst;
+ --n;
+ }
+ } else {
+ while (n > 0) {
+ *plDst = *plSrc;
+ ++plSrc;
+ ++plDst;
+ --n;
+ }
+ }
+
+ return dst;
+}
+
+void *pcilib_datacpy64(void * dst, void const * src, size_t n, pcilib_endianess_t endianess) {
+ uint64_t * plDst = (uint64_t *) dst;
+ uint64_t const * plSrc = (uint64_t const *) src;
+
+ int swap = 0;
+
+ if (endianess)
+ swap = (endianess == PCILIB_BIG_ENDIAN)?(be64toh(1)!=1):(be64toh(1)==1);
+
+ if (swap) {
+ while (n > 0) {
+ *plDst = ntohl(*plSrc);
+ ++plSrc;
+ ++plDst;
+ --n;
+ }
+ } else {
+ while (n > 0) {
+ *plDst = *plSrc;
+ ++plSrc;
+ ++plDst;
+ --n;
+ }
+ }
+
+ return dst;
+}
+
+typedef void* (*pcilib_datacpy_routine_t)(void * dst, void const * src, size_t n, pcilib_endianess_t endianess);
+static pcilib_datacpy_routine_t pcilib_datacpy_routines[4] = {
+ NULL, NULL, pcilib_datacpy32, pcilib_datacpy64
+};
+
+void *pcilib_datacpy(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess) {
+ size_t pos = 0;
+ pcilib_datacpy_routine_t routine;
+
+ assert((size)&&(size < 64));
+
+ while (size >>= 1) ++pos;
+ routine = pcilib_datacpy_routines[pos];
+
+ return routine(dst, src, n, endianess);
+}
diff --git a/pcilib/datacpy.h b/pcilib/datacpy.h
new file mode 100644
index 0000000..1ce2e79
--- /dev/null
+++ b/pcilib/datacpy.h
@@ -0,0 +1,21 @@
+#ifndef _PCILIB_DATACPY_H
+#define _PCILIB_DATACPY_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <pcilib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *pcilib_datacpy32(void * dst, void const * src, size_t n, pcilib_endianess_t endianess);
+void *pcilib_datacpy64(void * dst, void const * src, size_t n, pcilib_endianess_t endianess);
+void *pcilib_datacpy(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PCILIB_DATACPY_H */
diff --git a/pcilib/dma.c b/pcilib/dma.c
index be02c21..6191047 100644
--- a/pcilib/dma.c
+++ b/pcilib/dma.c
@@ -17,6 +17,8 @@
#include "pcilib.h"
#include "pci.h"
#include "dma.h"
+#include "tools.h"
+#include "pagecpy.h"
const pcilib_dma_description_t *pcilib_get_dma_description(pcilib_t *ctx) {
int err;
@@ -194,8 +196,8 @@ static int pcilib_dma_read_callback(void *arg, pcilib_dma_flags_t flags, size_t
pcilib_error("Buffer size (%li) is not large enough for DMA packet, at least %li bytes is required", ctx->size, ctx->pos + bufsize);
return -PCILIB_ERROR_TOOBIG;
}
-
- memcpy(ctx->data + ctx->pos, buf, bufsize);
+
+ pcilib_pagecpy(ctx->data + ctx->pos, buf, bufsize);
ctx->pos += bufsize;
if (flags & PCILIB_DMA_FLAG_EOP) {
diff --git a/pcilib/memcpy.c b/pcilib/memcpy.c
new file mode 100644
index 0000000..149d1fd
--- /dev/null
+++ b/pcilib/memcpy.c
@@ -0,0 +1,66 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+void *pcilib_memcpy8(void * dst, void const * src, size_t len) {
+ int i;
+ for (i = 0; i < len; i++) ((char*)dst)[i] = ((char*)src)[i];
+ return dst;
+}
+
+void *pcilib_memcpy32(void * dst, void const * src, size_t len) {
+ uint32_t * plDst = (uint32_t *) dst;
+ uint32_t const * plSrc = (uint32_t const *) src;
+
+ while (len >= 4) {
+// *plDst = ntohl(*plSrc);
+ *plDst = *plSrc;
+ plSrc++;
+ plDst++;
+ len -= 4;
+ }
+
+ char * pcDst = (char *) plDst;
+ char const * pcSrc = (char const *) plSrc;
+
+ while (len--) {
+ *pcDst++ = *pcSrc++;
+ }
+
+ return (dst);
+}
+
+
+void *pcilib_memcpy64(void * dst, void const * src, size_t len) {
+ uint64_t * plDst = (uint64_t *) dst;
+ uint64_t const * plSrc = (uint64_t const *) src;
+
+ while (len >= 8) {
+ *plDst++ = *plSrc++;
+ len -= 8;
+ }
+
+ char * pcDst = (char *) plDst;
+ char const * pcSrc = (char const *) plSrc;
+
+ while (len--) {
+ *pcDst++ = *pcSrc++;
+ }
+
+ return (dst);
+}
+
diff --git a/pcilib/memcpy.h b/pcilib/memcpy.h
new file mode 100644
index 0000000..3ac2115
--- /dev/null
+++ b/pcilib/memcpy.h
@@ -0,0 +1,22 @@
+#ifndef _PCILIB_MEMCPY_H
+#define _PCILIB_MEMCPY_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+
+#define pcilib_memcpy pcilib_memcpy32
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *pcilib_memcpy8(void * dst, void const * src, size_t len);
+void *pcilib_memcpy32(void * dst, void const * src, size_t len);
+void *pcilib_memcpy64(void * dst, void const * src, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PCILIB_MEMCPY_H */
diff --git a/pcilib/pagecpy.c b/pcilib/pagecpy.c
new file mode 100644
index 0000000..f474f9f
--- /dev/null
+++ b/pcilib/pagecpy.c
@@ -0,0 +1,153 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "cpu.h"
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+
+/*
+void *memcpy128(void * dst, void const * src, size_t len) {
+
+ long pos = - (len>>2);
+ char * plDst = (char *) dst - 4 * pos;
+ char const * plSrc = (char const *) src - 4 * pos;
+
+ if (pos) {
+ __asm__ __volatile__ (
+ "1: \n\t"
+ "mov (%0,%2,4), %%edi \n\t"
+ "mov %%edi, (%1,%2,4) \n\t"
+ "inc %2 \n\t"
+ "jnz 1b \n\t"
+ :
+ : "r" (plSrc), "r" (plDst), "r" (pos)
+ : "%edi"
+ );
+ }
+
+
+
+ long pos = - ((len>>4)<<4);
+ char * plDst = (char *) dst - pos;
+ char const * plSrc = (char const *) src - pos;
+
+ if (pos) {
+ __asm__ __volatile__ (
+ "1: \n\t"
+// "movdqa (%0,%2), %%xmm0 \n\t"
+ "mov (%0,%2), %%esi \n\t"
+ "movd %%esi, %%xmm0 \n\t"
+ "mov 4(%0,%2), %%esi \n\t"
+ "movd %%esi, %%xmm1 \n\t"
+ "mov 8(%0,%2), %%esi \n\t"
+ "movd %%esi, %%xmm2 \n\t"
+ "mov 12(%0,%2), %%esi \n\t"
+ "movd %%esi, %%xmm3 \n\t"
+ "pslldq $4, %%xmm1 \n\t"
+ "por %%xmm1, %%xmm0 \n\t"
+ "pslldq $8, %%xmm2 \n\t"
+ "por %%xmm2, %%xmm0 \n\t"
+ "pslldq $12, %%xmm3 \n\t"
+ "por %%xmm3, %%xmm0 \n\t"
+
+ "movntdq %%xmm0, (%1,%2) \n\t"
+ "add $16, %2 \n\t"
+ "jnz 1b \n\t"
+ :
+ : "r" (plSrc), "r" (plDst), "r" (pos)
+ : "%rsi"
+ );
+ }
+
+
+
+ len &= 0x3;
+
+ char * pcDst = (char *) plDst;
+ char const * pcSrc = (char const *) plSrc;
+
+ while (len--) {
+ *pcDst++ = *pcSrc++;
+ }
+
+ return (dst);
+}
+*/
+
+void pcilib_memcpy4k_avx(void *dst, void *src, size_t size) {
+ size_t sse_size = (size / 512);
+
+ __asm__ __volatile__ (
+ "push %2 \n\t"
+ "mov $0, %%rax \n\t"
+
+ "1: \n\t"
+
+ "vmovdqa (%0,%%rax), %%ymm0 \n\t"
+ "vmovdqa 32(%0,%%rax), %%ymm1 \n\t"
+ "vmovdqa 64(%0,%%rax), %%ymm2 \n\t"
+ "vmovdqa 96(%0,%%rax), %%ymm3 \n\t"
+ "vmovdqa 128(%0,%%rax), %%ymm4 \n\t"
+ "vmovdqa 160(%0,%%rax), %%ymm5 \n\t"
+ "vmovdqa 192(%0,%%rax), %%ymm6 \n\t"
+ "vmovdqa 224(%0,%%rax), %%ymm7 \n\t"
+
+ "vmovdqa 256(%0,%%rax), %%ymm8 \n\t"
+ "vmovdqa 288(%0,%%rax), %%ymm9 \n\t"
+ "vmovdqa 320(%0,%%rax), %%ymm10 \n\t"
+ "vmovdqa 352(%0,%%rax), %%ymm11 \n\t"
+ "vmovdqa 384(%0,%%rax), %%ymm12 \n\t"
+ "vmovdqa 416(%0,%%rax), %%ymm13 \n\t"
+ "vmovdqa 448(%0,%%rax), %%ymm14 \n\t"
+ "vmovdqa 480(%0,%%rax), %%ymm15 \n\t"
+
+ "vmovntps %%ymm0, (%1,%%rax) \n\t"
+ "vmovntps %%ymm1, 32(%1,%%rax) \n\t"
+ "vmovntps %%ymm2, 64(%1,%%rax) \n\t"
+ "vmovntps %%ymm3, 96(%1,%%rax) \n\t"
+ "vmovntps %%ymm4, 128(%1,%%rax) \n\t"
+ "vmovntps %%ymm5, 160(%1,%%rax) \n\t"
+ "vmovntps %%ymm6, 192(%1,%%rax) \n\t"
+ "vmovntps %%ymm7, 224(%1,%%rax) \n\t"
+
+ "vmovntps %%ymm8, 256(%1,%%rax) \n\t"
+ "vmovntps %%ymm9, 288(%1,%%rax) \n\t"
+ "vmovntps %%ymm10, 320(%1,%%rax) \n\t"
+ "vmovntps %%ymm11, 352(%1,%%rax) \n\t"
+ "vmovntps %%ymm12, 384(%1,%%rax) \n\t"
+ "vmovntps %%ymm13, 416(%1,%%rax) \n\t"
+ "vmovntps %%ymm14, 448(%1,%%rax) \n\t"
+ "vmovntps %%ymm15, 480(%1,%%rax) \n\t"
+
+ "add $512, %%rax \n\t"
+ "dec %2 \n\t"
+ "jnz 1b \n\t"
+ "pop %2 \n\t"
+
+ "sfence"
+ :
+ : "p" (dst), "p" (src), "r" (sse_size)
+ : "%rax"
+ );
+}
+
+void pcilib_pagecpy(void *dst, void *src, size_t size) {
+ int gen = pcilib_get_cpu_gen();
+ if ((gen > 3)&&(size%4096==0)&&((uintptr_t)dst%32==0)&&((uintptr_t)src%32==0)) {
+ pcilib_memcpy4k_avx(dst, src, size);
+ } else
+ memcpy(dst, src, size);
+}
diff --git a/pcilib/pagecpy.h b/pcilib/pagecpy.h
new file mode 100644
index 0000000..ef8636b
--- /dev/null
+++ b/pcilib/pagecpy.h
@@ -0,0 +1,29 @@
+#ifndef _PCILIB_PAGECPY_H
+#define _PCILIB_PAGECPY_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * This function should be used to move large blocks of non-cached memory between
+ * aligned memory locations. The function will determine the CPU model and alginment
+ * and call appropriate implementation. If nothing suitable found, standard memcpy
+ * will be used. It is OK to call on small or unligned data, the standard memcpy
+ * will be executed in this case. The memory regions should not intersect.
+ * Only AVX implementation so far.
+ * @param[out] dst - destination memory region
+ * @param[in] src - source memory region
+ * @param[in] size - size of memory region in bytes.
+ * @return - `dst` or NULL on error
+ */
+void pcilib_pagecpy(void *dst, void *src, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PCILIB_PAGECPY_H */
diff --git a/pcilib/pci.h b/pcilib/pci.h
index 8f05ddf..1b61f70 100644
--- a/pcilib/pci.h
+++ b/pcilib/pci.h
@@ -20,6 +20,8 @@
#include "linux-3.10.h"
#include "driver/pciDriver.h"
+#include "timing.h"
+#include "cpu.h"
#include "pcilib.h"
#include "register.h"
#include "kmem.h"
@@ -32,6 +34,7 @@
#include "xml.h"
#include "py.h"
#include "view.h"
+#include "memcpy.h"
typedef struct {
uint8_t max_link_speed, link_speed;
diff --git a/pcilib/pcilib.h b/pcilib/pcilib.h
index 272df8d..776a5c5 100644
--- a/pcilib/pcilib.h
+++ b/pcilib/pcilib.h
@@ -17,7 +17,7 @@ typedef uint16_t pcilib_view_t; /**< Type holding the register view position w
typedef uint16_t pcilib_unit_t; /**< Type holding the value unit position within unit listing in the model */
typedef uint32_t pcilib_register_addr_t; /**< Type holding the register address within address-space of BARs */
typedef uint8_t pcilib_register_size_t; /**< Type holding the size in bits of the register */
-typedef uint32_t pcilib_register_value_t; /**< Type holding the register value */
+typedef uint64_t pcilib_register_value_t; /**< Type holding the register value */
typedef uint8_t pcilib_dma_engine_addr_t;
typedef uint8_t pcilib_dma_engine_t;
typedef uint64_t pcilib_event_id_t;
diff --git a/pcilib/py.c b/pcilib/py.c
index 5ec122f..4256afc 100644
--- a/pcilib/py.c
+++ b/pcilib/py.c
@@ -142,7 +142,7 @@ static char *pcilib_py_parse_string(pcilib_t *ctx, const char *codestr, pcilib_v
} else {
err = pcilib_read_register(ctx, NULL, reg, &regval);
if (err) break;
- sprintf(dst + offset, "0x%x", regval);
+ sprintf(dst + offset, "0x%lx", regval);
}
}
diff --git a/pcilib/register.c b/pcilib/register.c
index a11bdac..12e3d28 100644
--- a/pcilib/register.c
+++ b/pcilib/register.c
@@ -16,6 +16,7 @@
#include "pci.h"
#include "bank.h"
+#include "datacpy.h"
#include "tools.h"
#include "error.h"
#include "property.h"
diff --git a/pcilib/timing.c b/pcilib/timing.c
new file mode 100644
index 0000000..0632b08
--- /dev/null
+++ b/pcilib/timing.c
@@ -0,0 +1,89 @@
+#define _POSIX_C_SOURCE 200112L
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include "pci.h"
+#include "tools.h"
+#include "error.h"
+
+int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout) {
+ tv->tv_usec += timeout%1000000;
+ if (tv->tv_usec > 999999) {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec += 1 + timeout/1000000;
+ } else {
+ tv->tv_sec += timeout/1000000;
+ }
+
+ return 0;
+}
+
+int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout) {
+ gettimeofday(tv, NULL);
+ pcilib_add_timeout(tv, timeout);
+
+ return 0;
+}
+
+int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout) {
+ int64_t res;
+ struct timeval tvs;
+
+ if (!tve->tv_sec) return 0;
+
+ gettimeofday(&tvs, NULL);
+ res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
+ // Hm... Some problems comparing signed and unsigned. So, sign check first
+ if ((res < 0)||(res < timeout)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve) {
+ int64_t res;
+ struct timeval tvs;
+
+ gettimeofday(&tvs, NULL);
+ res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
+
+ if (res < 0) return 0;
+ return res;
+}
+
+int pcilib_sleep_until_deadline(struct timeval *tv) {
+ struct timespec wait;
+ pcilib_timeout_t duration;
+
+ duration = pcilib_calc_time_to_deadline(tv);
+ if (duration > 0) {
+ wait.tv_sec = duration / 1000000;
+ wait.tv_nsec = 1000 * (duration % 1000000);
+ nanosleep(&wait, NULL);
+ }
+
+ return 0;
+}
+
+pcilib_timeout_t pcilib_timediff(struct timeval *tvs, struct timeval *tve) {
+ return ((tve->tv_sec - tvs->tv_sec)*1000000 + (tve->tv_usec - tvs->tv_usec));
+}
+
+int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2) {
+ if (tv1->tv_sec > tv2->tv_sec) return 1;
+ else if (tv1->tv_sec < tv2->tv_sec) return -1;
+ else if (tv1->tv_usec > tv2->tv_usec) return 1;
+ else if (tv1->tv_usec < tv2->tv_usec) return -1;
+ return 0;
+}
diff --git a/pcilib/timing.h b/pcilib/timing.h
new file mode 100644
index 0000000..630df44
--- /dev/null
+++ b/pcilib/timing.h
@@ -0,0 +1,25 @@
+#ifndef _PCILIB_TIMING_H
+#define _PCILIB_TIMING_H
+
+#include <sys/time.h>
+#include <pcilib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout);
+int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout);
+int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout);
+pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve);
+int pcilib_sleep_until_deadline(struct timeval *tv);
+int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2);
+pcilib_timeout_t pcilib_timediff(struct timeval *tve, struct timeval *tvs);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PCILIB_TIMING_H */
diff --git a/pcilib/tools.c b/pcilib/tools.c
index 352127d..a7bb444 100644
--- a/pcilib/tools.c
+++ b/pcilib/tools.c
@@ -101,255 +101,6 @@ void pcilib_swap(void *dst, void *src, size_t size, size_t n) {
}
}
-void *pcilib_memcpy8(void * dst, void const * src, size_t len) {
- int i;
- for (i = 0; i < len; i++) ((char*)dst)[i] = ((char*)src)[i];
- return dst;
-}
-
-void *pcilib_memcpy32(void * dst, void const * src, size_t len) {
- uint32_t * plDst = (uint32_t *) dst;
- uint32_t const * plSrc = (uint32_t const *) src;
-
- while (len >= 4) {
-// *plDst = ntohl(*plSrc);
- *plDst = *plSrc;
- plSrc++;
- plDst++;
- len -= 4;
- }
-
- char * pcDst = (char *) plDst;
- char const * pcSrc = (char const *) plSrc;
-
- while (len--) {
- *pcDst++ = *pcSrc++;
- }
-
- return (dst);
-}
-
-
-void *pcilib_memcpy64(void * dst, void const * src, size_t len) {
- uint64_t * plDst = (uint64_t *) dst;
- uint64_t const * plSrc = (uint64_t const *) src;
-
- while (len >= 8) {
- *plDst++ = *plSrc++;
- len -= 8;
- }
-
- char * pcDst = (char *) plDst;
- char const * pcSrc = (char const *) plSrc;
-
- while (len--) {
- *pcDst++ = *pcSrc++;
- }
-
- return (dst);
-}
-
-/*
-void *memcpy128(void * dst, void const * src, size_t len) {
-
- long pos = - (len>>2);
- char * plDst = (char *) dst - 4 * pos;
- char const * plSrc = (char const *) src - 4 * pos;
-
- if (pos) {
- __asm__ __volatile__ (
- "1: \n\t"
- "mov (%0,%2,4), %%edi \n\t"
- "mov %%edi, (%1,%2,4) \n\t"
- "inc %2 \n\t"
- "jnz 1b \n\t"
- :
- : "r" (plSrc), "r" (plDst), "r" (pos)
- : "%edi"
- );
- }
-
-
-
- long pos = - ((len>>4)<<4);
- char * plDst = (char *) dst - pos;
- char const * plSrc = (char const *) src - pos;
-
- if (pos) {
- __asm__ __volatile__ (
- "1: \n\t"
-// "movdqa (%0,%2), %%xmm0 \n\t"
- "mov (%0,%2), %%esi \n\t"
- "movd %%esi, %%xmm0 \n\t"
- "mov 4(%0,%2), %%esi \n\t"
- "movd %%esi, %%xmm1 \n\t"
- "mov 8(%0,%2), %%esi \n\t"
- "movd %%esi, %%xmm2 \n\t"
- "mov 12(%0,%2), %%esi \n\t"
- "movd %%esi, %%xmm3 \n\t"
- "pslldq $4, %%xmm1 \n\t"
- "por %%xmm1, %%xmm0 \n\t"
- "pslldq $8, %%xmm2 \n\t"
- "por %%xmm2, %%xmm0 \n\t"
- "pslldq $12, %%xmm3 \n\t"
- "por %%xmm3, %%xmm0 \n\t"
-
- "movntdq %%xmm0, (%1,%2) \n\t"
- "add $16, %2 \n\t"
- "jnz 1b \n\t"
- :
- : "r" (plSrc), "r" (plDst), "r" (pos)
- : "%rsi"
- );
- }
-
-
-
- len &= 0x3;
-
- char * pcDst = (char *) plDst;
- char const * pcSrc = (char const *) plSrc;
-
- while (len--) {
- *pcDst++ = *pcSrc++;
- }
-
- return (dst);
-}
-*/
-
-void *pcilib_datacpy32(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess) {
- uint32_t * plDst = (uint32_t *) dst;
- uint32_t const * plSrc = (uint32_t const *) src;
-
- int swap = 0;
-
- if (endianess)
- swap = (endianess == PCILIB_BIG_ENDIAN)?(ntohs(1)!=1):(ntohs(1)==1);
-
- assert(size == 4); // only 32 bit at the moment
-
- if (swap) {
- while (n > 0) {
- *plDst = ntohl(*plSrc);
- ++plSrc;
- ++plDst;
- --n;
- }
- } else {
- while (n > 0) {
- *plDst = *plSrc;
- ++plSrc;
- ++plDst;
- --n;
- }
- }
-
- return dst;
-}
-
-int pcilib_get_page_mask() {
- int pagesize,pagemask,temp;
-
- pagesize = sysconf(_SC_PAGESIZE);
-
- for( pagemask=0, temp = pagesize; temp != 1; ) {
- temp = (temp >> 1);
- pagemask = (pagemask << 1)+1;
- }
- return pagemask;
-}
-
-int pcilib_get_cpu_count() {
- int err;
-
- int cpu_count;
- cpu_set_t mask;
-
- err = sched_getaffinity(getpid(), sizeof(mask), &mask);
- if (err) return 1;
-
-#ifdef CPU_COUNT
- cpu_count = CPU_COUNT(&mask);
-#else
- for (cpu_count = 0; cpu_count < CPU_SETSIZE; cpu_count++) {
- if (!CPU_ISSET(cpu_count, &mask)) break;
- }
-#endif
-
- if (!cpu_count) cpu_count = PCILIB_DEFAULT_CPU_COUNT;
- return cpu_count;
-}
-
-
-int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout) {
- tv->tv_usec += timeout%1000000;
- if (tv->tv_usec > 999999) {
- tv->tv_usec -= 1000000;
- tv->tv_sec += 1 + timeout/1000000;
- } else {
- tv->tv_sec += timeout/1000000;
- }
-
- return 0;
-}
-int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout) {
- gettimeofday(tv, NULL);
- pcilib_add_timeout(tv, timeout);
- return 0;
-}
-
-int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout) {
- int64_t res;
- struct timeval tvs;
-
- if (!tve->tv_sec) return 0;
-
- gettimeofday(&tvs, NULL);
- res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
- // Hm... Some problems comparing signed and unsigned. So, sign check first
- if ((res < 0)||(res < timeout)) {
- return 1;
- }
- return 0;
-}
-
-pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve) {
- int64_t res;
- struct timeval tvs;
-
- gettimeofday(&tvs, NULL);
- res = ((tve->tv_sec - tvs.tv_sec)*1000000 + (tve->tv_usec - tvs.tv_usec));
-
- if (res < 0) return 0;
- return res;
-}
-
-int pcilib_sleep_until_deadline(struct timeval *tv) {
- struct timespec wait;
- pcilib_timeout_t duration;
-
- duration = pcilib_calc_time_to_deadline(tv);
- if (duration > 0) {
- wait.tv_sec = duration / 1000000;
- wait.tv_nsec = 1000 * (duration % 1000000);
- nanosleep(&wait, NULL);
- }
-
- return 0;
-}
-
-pcilib_timeout_t pcilib_timediff(struct timeval *tvs, struct timeval *tve) {
- return ((tve->tv_sec - tvs->tv_sec)*1000000 + (tve->tv_usec - tvs->tv_usec));
-}
-
-int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2) {
- if (tv1->tv_sec > tv2->tv_sec) return 1;
- else if (tv1->tv_sec < tv2->tv_sec) return -1;
- else if (tv1->tv_usec > tv2->tv_usec) return 1;
- else if (tv1->tv_usec < tv2->tv_usec) return -1;
- return 0;
-}
diff --git a/pcilib/tools.h b/pcilib/tools.h
index 8e91b17..8c525e0 100644
--- a/pcilib/tools.h
+++ b/pcilib/tools.h
@@ -6,14 +6,10 @@
#include <pcilib.h>
-#define pcilib_memcpy pcilib_memcpy32
-#define pcilib_datacpy pcilib_datacpy32
-
#define BIT_MASK(bits) ((1ll << (bits)) - 1)
#define min2(a, b) (((a)<(b))?(a):(b))
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -28,23 +24,6 @@ uint32_t pcilib_swap32(uint32_t x);
uint64_t pcilib_swap64(uint64_t x);
void pcilib_swap(void *dst, void *src, size_t size, size_t n);
-void * pcilib_memcpy8(void * dst, void const * src, size_t len);
-void * pcilib_memcpy32(void * dst, void const * src, size_t len);
-void * pcilib_memcpy64(void * dst, void const * src, size_t len);
-void * pcilib_datacpy32(void * dst, void const * src, uint8_t size, size_t n, pcilib_endianess_t endianess);
-
-int pcilib_get_page_mask();
-int pcilib_get_cpu_count();
-
-
-int pcilib_add_timeout(struct timeval *tv, pcilib_timeout_t timeout);
-int pcilib_calc_deadline(struct timeval *tv, pcilib_timeout_t timeout);
-int pcilib_check_deadline(struct timeval *tve, pcilib_timeout_t timeout);
-pcilib_timeout_t pcilib_calc_time_to_deadline(struct timeval *tve);
-int pcilib_sleep_until_deadline(struct timeval *tv);
-int pcilib_timecmp(struct timeval *tv1, struct timeval *tv2);
-pcilib_timeout_t pcilib_timediff(struct timeval *tve, struct timeval *tvs);
-
#ifdef __cplusplus
}
#endif
diff --git a/pcitool/cli.c b/pcitool/cli.c
index 90f11d4..8943347 100644
--- a/pcitool/cli.c
+++ b/pcitool/cli.c
@@ -753,9 +753,9 @@ void ViewInfo(pcilib_t *handle, pcilib_register_t reg, size_t id) {
printf(" Value aliases :");
for (i = 0; vnames[i].name; i++) {
if (i) printf(",");
- printf(" %s = %u", vnames[i].name, vnames[i].value);
+ printf(" %s = %lu", vnames[i].name, vnames[i].value);
if (vnames[i].min != vnames[i].max)
- printf(" (%u - %u)", vnames[i].min, vnames[i].max);
+ printf(" (%lu - %lu)", vnames[i].min, vnames[i].max);
}
printf("\n");
} else if (v->api == &pcilib_transform_view_api) {
diff --git a/protocols/default.c b/protocols/default.c
index cbc53a8..6f3dccf 100644
--- a/protocols/default.c
+++ b/protocols/default.c
@@ -6,6 +6,7 @@
#include "model.h"
#include "error.h"
#include "bar.h"
+#include "datacpy.h"
#define default_datacpy(dst, src, access, bank) pcilib_datacpy(dst, src, access, 1, bank->raw_endianess)
diff --git a/protocols/software.c b/protocols/software.c
index 55ed647..3da8fde 100644
--- a/protocols/software.c
+++ b/protocols/software.c
@@ -4,11 +4,14 @@
#include <string.h>
#include <sys/file.h>
+
+#include "tools.h"
#include "model.h"
#include "error.h"
#include "kmem.h"
#include "pcilib.h"
#include "pci.h"
+#include "datacpy.h"
typedef struct pcilib_software_register_bank_context_s pcilib_software_register_bank_context_t;
@@ -103,23 +106,33 @@ pcilib_register_bank_context_t* pcilib_software_registers_open(pcilib_t *ctx, pc
}
int pcilib_software_registers_read(pcilib_t *ctx, pcilib_register_bank_context_t *bank_ctx, pcilib_register_addr_t addr, pcilib_register_value_t *value){
- if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
- pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
- return PCILIB_ERROR_INVALID_ADDRESS;
- }
+ const pcilib_register_bank_description_t *b = bank_ctx->bank;
+ int access = b->access / 8;
+
+ pcilib_register_value_t val = 0;
+
+ if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
+ pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
+ return PCILIB_ERROR_INVALID_ADDRESS;
+ }
+
+ pcilib_datacpy(&val, ((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr, access, 1, b->raw_endianess);
+ *value = val;
- // we consider this atomic operation and, therefore, do no locking
- *value = *(pcilib_register_value_t*)(((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr);
- return 0;
+ return 0;
}
int pcilib_software_registers_write(pcilib_t *ctx, pcilib_register_bank_context_t *bank_ctx, pcilib_register_addr_t addr, pcilib_register_value_t value) {
- if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
- pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
- return PCILIB_ERROR_INVALID_ADDRESS;
- }
-
- // we consider this atomic operation and, therefore, do no locking
- *(pcilib_register_value_t*)(((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr) = value;
- return 0;
+ const pcilib_register_bank_description_t *b = bank_ctx->bank;
+ int access = b->access / 8;
+
+ if ((addr + sizeof(pcilib_register_value_t)) > bank_ctx->bank->size) {
+ pcilib_error("Trying to access space outside of the define register bank (bank: %s, addr: 0x%lx)", bank_ctx->bank->name, addr);
+ return PCILIB_ERROR_INVALID_ADDRESS;
+ }
+
+ // we consider this atomic operation and, therefore, do no locking
+ pcilib_datacpy(((pcilib_software_register_bank_context_t*)bank_ctx)->addr + addr, &value, access, 1, b->raw_endianess);
+
+ return 0;
}