From a5423ce5ce62969d3ff20f9dd21d888a59e5585a Mon Sep 17 00:00:00 2001 From: Cary R Date: Thu, 10 Apr 2014 14:33:12 -0700 Subject: [PATCH] Update FST and related files to match the latest from GTKWave --- vpi/Makefile.in | 2 +- vpi/cppcheck.sup | 51 +-- vpi/fastlz.c | 2 +- vpi/fstapi.c | 996 +++++++++++++++++++++++++++++++++++------------ vpi/fstapi.h | 14 +- vpi/lz4.c | 877 +++++++++++++++++++++++++++++++++++++++++ vpi/lz4.h | 252 ++++++++++++ 7 files changed, 1907 insertions(+), 287 deletions(-) create mode 100644 vpi/lz4.c create mode 100644 vpi/lz4.h diff --git a/vpi/Makefile.in b/vpi/Makefile.in index 278be1f43..a6b1c1416 100644 --- a/vpi/Makefile.in +++ b/vpi/Makefile.in @@ -64,7 +64,7 @@ ifeq (@HAVE_LIBBZ2@,yes) O += sys_lxt.o lxt_write.o endif O += sys_lxt2.o lxt2_write.o -O += sys_fst.o fstapi.o fastlz.o +O += sys_fst.o fstapi.o fastlz.o lz4.o endif # Object files for v2005_math.vpi diff --git a/vpi/cppcheck.sup b/vpi/cppcheck.sup index 276a3eeac..833f1b7dc 100644 --- a/vpi/cppcheck.sup +++ b/vpi/cppcheck.sup @@ -3,30 +3,33 @@ // problems will not be fixed. // fstapi.c from GTKWave -obsoleteFunctionsasctime:fstapi.c:740 -obsoleteFunctionsalloca:fstapi.c:1994 -unreadVariable:fstapi.c:1388 -variableScope:fstapi.c:842 -variableScope:fstapi.c:1798 -variableScope:fstapi.c:2222 -variableScope:fstapi.c:2414 -variableScope:fstapi.c:2573 -variableScope:fstapi.c:2574 -variableScope:fstapi.c:3193 -variableScope:fstapi.c:3218 -variableScope:fstapi.c:3341 -variableScope:fstapi.c:3777 -variableScope:fstapi.c:3786 -variableScope:fstapi.c:4297 -variableScope:fstapi.c:4300 -variableScope:fstapi.c:4867 -variableScope:fstapi.c:4921 -variableScope:fstapi.c:5225 -variableScope:fstapi.c:5228 -variableScope:fstapi.c:5464 -variableScope:fstapi.c:5569 -variableScope:fstapi.c:5570 -variableScope:fstapi.c:5601 +obsoleteFunctionsasctime:fstapi.c:840 +obsoleteFunctionsalloca:fstapi.c:2225 +unreadVariable:fstapi.c:1569 +variableScope:fstapi.c:581 +variableScope:fstapi.c:1301 +variableScope:fstapi.c:1900 +variableScope:fstapi.c:2029 +variableScope:fstapi.c:2454 +variableScope:fstapi.c:2646 +variableScope:fstapi.c:2805 +variableScope:fstapi.c:2806 +variableScope:fstapi.c:3521 +variableScope:fstapi.c:3692 +variableScope:fstapi.c:4135 +variableScope:fstapi.c:4144 +variableScope:fstapi.c:4676 +variableScope:fstapi.c:4679 +variableScope:fstapi.c:5177 +variableScope:fstapi.c:5178 +variableScope:fstapi.c:5334 +variableScope:fstapi.c:5392 +variableScope:fstapi.c:5705 +variableScope:fstapi.c:5708 +variableScope:fstapi.c:5944 +variableScope:fstapi.c:6049 +variableScope:fstapi.c:6050 +variableScope:fstapi.c:6081 // lxt2_write.c from GTKWave obsoleteFunctionsalloca:lxt2_write.c:1813 diff --git a/vpi/fastlz.c b/vpi/fastlz.c index 8e14ecfc0..aa2f8bfda 100644 --- a/vpi/fastlz.c +++ b/vpi/fastlz.c @@ -215,7 +215,7 @@ static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1)) { distance = 1; - ip += 3; + /* ip += 3; */ /* scan-build, never used */ ref = anchor - 1 + 3; goto match; } diff --git a/vpi/fstapi.c b/vpi/fstapi.c index a2d00454b..7f1ee1cd7 100644 --- a/vpi/fstapi.c +++ b/vpi/fstapi.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Tony Bybell. + * Copyright (c) 2009-2014 Tony Bybell. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,10 +20,29 @@ * DEALINGS IN THE SOFTWARE. */ +/* + * possible disables: + * + * FST_DYNAMIC_ALIAS_DISABLE : dynamic aliases are not processed + * FST_DYNAMIC_ALIAS2_DISABLE : new encoding for dynamic aliases is not generated + * FST_WRITEX_DISABLE : fast write I/O routines are disabled + * FST_DISABLE_DUFFS_DEVICE : only if indirect branches are incredibly bad on host arch + * + * possible enables: + * + * FST_DEBUG : not for production use, only enable for development + * FST_REMOVE_DUPLICATE_VC : glitch removal (has writer performance impact) + * HAVE_LIBPTHREAD -> FST_WRITER_PARALLEL : enables inclusion of parallel writer code + * FST_DO_MISALIGNED_OPS (defined automatically for x86 and some others) : CPU architecture can handle misaligned loads/stores + * _WAVE_HAVE_JUDY : use Judy arrays instead of Jenkins (undefine if LGPL is not acceptable) + * + */ + #include #include "fstapi.h" #include "fastlz.h" +#include "lz4.h" #ifndef HAVE_LIBPTHREAD #undef FST_WRITER_PARALLEL @@ -69,13 +88,22 @@ void JenkinsFree(void *base_i, uint32_t hashmask); void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint32_t hashmask); #endif -#undef FST_DEBUG +#ifndef FST_WRITEX_DISABLE +#define FST_WRITEX_MAX (64 * 1024) +#else +#define fstWritex(a,b,c) fstFwrite((b), (c), 1, fv) +#endif + + +/* these defines have a large impact on writer speed when a model has a */ +/* huge number of symbols. as a default, use 128MB and increment when */ +/* every 1M signals are defined. */ #define FST_BREAK_SIZE (1UL << 27) #define FST_BREAK_ADD_SIZE (1UL << 22) #define FST_BREAK_SIZE_MAX (1UL << 31) -#define FST_ACTIVATE_HUGE_BREAK (2000000) -#define FST_ACTIVATE_HUGE_INC (2000000) +#define FST_ACTIVATE_HUGE_BREAK (1000000) +#define FST_ACTIVATE_HUGE_INC (1000000) #define FST_WRITER_STR "fstWriter" #define FST_ID_NAM_SIZ (512) @@ -86,6 +114,7 @@ void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint3 #define FST_HDR_FILETYPE_SIZE (1) #define FST_HDR_TIMEZERO_SIZE (8) #define FST_GZIO_LEN (32768) +#define FST_HDR_FOURPACK_DUO_SIZE (4*1024*1024) #if defined(__i386__) || defined(__x86_64__) || defined(_AIX) #define FST_DO_MISALIGNED_OPS @@ -93,6 +122,7 @@ void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint3 #if defined(__APPLE__) && defined(__MACH__) #define FST_MACOSX +#include #endif @@ -110,8 +140,14 @@ void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint3 #endif #endif - -/* the recoded "extra" values... */ +/* + * the recoded "extra" values... + * note that FST_RCV_Q is currently unused and is for future expansion. + * its intended use is as another level of escape such that any arbitrary + * value can be stored as the value: { time_delta, 8 bits, FST_RCV_Q }. + * this is currently not implemented so that the branchless decode is: + * uint32_t shcnt = 2 << (vli & 1); tdelta = vli >> shcnt; + */ #define FST_RCV_X (1 | (0<<1)) #define FST_RCV_Z (1 | (1<<1)) #define FST_RCV_H (1 | (2<<1)) @@ -119,6 +155,7 @@ void **JenkinsIns(void *base_i, const unsigned char *mem, uint32_t length, uint3 #define FST_RCV_W (1 | (4<<1)) #define FST_RCV_L (1 | (5<<1)) #define FST_RCV_D (1 | (6<<1)) +#define FST_RCV_Q (1 | (7<<1)) #define FST_RCV_STR "xzhuwl-?" /* 01234567 */ @@ -336,20 +373,27 @@ return(rc); static unsigned char *fstCopyVarint32ToLeft(unsigned char *pnt, uint32_t v) { -unsigned char buf[5]; -unsigned char *spnt = buf; -uint32_t nxt; +unsigned char *spnt; +uint32_t nxt = v; +int cnt = 1; +int i; -while((nxt = v>>7)) +while((nxt = nxt>>7)) /* determine len to avoid temp buffer copying to cut down on load-hit-store */ { - *(spnt++) = (v&0x7f) | 0x80; + cnt++; + } + +pnt -= cnt; +spnt = pnt; +cnt--; + +for(i=0;i>7; + *(spnt++) = ((unsigned char)v) | 0x80; v = nxt; } -*(spnt++) = (v&0x7f); - -do { - *(--pnt) = *(--spnt); - } while(spnt != buf); +*spnt = (unsigned char)v; return(pnt); } @@ -361,10 +405,10 @@ uint64_t nxt; while((nxt = v>>7)) { - *(pnt++) = (v&0x7f) | 0x80; + *(pnt++) = ((unsigned char)v) | 0x80; v = nxt; } -*(pnt++) = (v&0x7f); +*(pnt++) = (unsigned char)v; return(pnt); } @@ -492,10 +536,65 @@ int len; while((nxt = v>>7)) { - *(pnt++) = (v&0x7f) | 0x80; + *(pnt++) = ((unsigned char)v) | 0x80; v = nxt; } -*(pnt++) = (v&0x7f); +*(pnt++) = (unsigned char)v; + +len = pnt-buf; +fstFwrite(buf, len, 1, handle); +return(len); +} + + +/* signed integer read/write routines are currently unused */ +static int64_t fstGetSVarint64(unsigned char *mem, int *skiplen) +{ +unsigned char *mem_orig = mem; +int64_t rc = 0; +const int64_t one = 1; +const int siz = sizeof(int64_t) * 8; +int shift = 0; +unsigned char byt; + +do { + byt = *(mem++); + rc |= ((int64_t)(byt & 0x7f)) << shift; + shift += 7; + + } while(byt & 0x80); + +if((shift>= 7; + + if (((!v) && (!(byt & 0x40))) || ((v == -1) && (byt & 0x40))) + { + more = 0; + byt &= 0x7f; + } + + *(pnt++) = byt; + } while(more); len = pnt-buf; fstFwrite(buf, len, 1, handle); @@ -544,6 +643,7 @@ uint32_t maxvalpos; unsigned vc_emitted : 1; unsigned is_initial_time : 1; +unsigned fourpack : 1; unsigned fastpack : 1; int64_t timezero; @@ -638,10 +738,10 @@ pnt += 4; while((nxt = v>>7)) { - *(pnt++) = (v&0x7f) | 0x80; + *(pnt++) = ((unsigned char)v) | 0x80; v = nxt; } -*(pnt++) = (v&0x7f); +*(pnt++) = (unsigned char)v; memcpy(pnt, dbuf, siz); len = pnt-buf + siz; @@ -665,18 +765,18 @@ pnt += 4; while((nxt = v>>7)) { - *(pnt++) = (v&0x7f) | 0x80; + *(pnt++) = ((unsigned char)v) | 0x80; v = nxt; } -*(pnt++) = (v&0x7f); +*(pnt++) = (unsigned char)v; v = siz; while((nxt = v>>7)) { - *(pnt++) = (v&0x7f) | 0x80; + *(pnt++) = ((unsigned char)v) | 0x80; v = nxt; } -*(pnt++) = (v&0x7f); +*(pnt++) = (unsigned char)v; memcpy(pnt, dbuf, siz); @@ -827,14 +927,10 @@ xc->curval_mem = NULL; static void fstDetermineBreakSize(struct fstWriterContext *xc) { #if defined(__linux__) || defined(FST_MACOSX) +int was_set = 0; #ifdef __linux__ FILE *f = fopen("/proc/meminfo", "rb"); -#else -FILE *f = popen("system_profiler", "r"); -#endif - -int was_set = 0; if(f) { @@ -846,18 +942,10 @@ if(f) s = fgets(buf, 256, f); if(s && *s) { -#ifdef __linux__ if(!strncmp(s, "MemTotal:", 9)) { size_t v = atol(s+10); v *= 1024; /* convert to bytes */ -#else - if((s=strstr(s, "Memory:"))) - { - size_t v = atol(s+7); - v <<= 30; /* convert GB to bytes */ -#endif - v /= 8; /* chop down to 1/8 physical memory */ if(v > FST_BREAK_SIZE) { @@ -874,18 +962,45 @@ if(f) } } -#ifdef __linux__ fclose(f); -#else - pclose(f); -#endif } if(!was_set) -#endif { xc->fst_huge_break_size = FST_BREAK_SIZE; } +#else +int mib[2]; +int64_t v; +size_t length; + +mib[0] = CTL_HW; +mib[1] = HW_MEMSIZE; +length = sizeof(int64_t); +if(!sysctl(mib, 2, &v, &length, NULL, 0)) + { + v /= 8; + + if(v > FST_BREAK_SIZE) + { + if(v > FST_BREAK_SIZE_MAX) + { + v = FST_BREAK_SIZE_MAX; + } + + xc->fst_huge_break_size = v; + was_set = 1; + } + } + +if(!was_set) + { + xc->fst_huge_break_size = FST_BREAK_SIZE; + } +#endif +#else +xc->fst_huge_break_size = FST_BREAK_SIZE; +#endif xc->fst_break_size = xc->fst_orig_break_size = FST_BREAK_SIZE; xc->fst_break_add_size = xc->fst_orig_break_add_size = FST_BREAK_ADD_SIZE; @@ -969,8 +1084,8 @@ if(xc) int rc; destlen = xc->maxvalpos; - dmem = malloc(destlen); - rc = compress2(dmem, &destlen, xc->curval_mem, xc->maxvalpos, 9); + dmem = malloc(compressBound(destlen)); + rc = compress2(dmem, &destlen, xc->curval_mem, xc->maxvalpos, 4); /* was 9...which caused performance drag on traces with many signals */ fputc(FST_BL_SKIP, xc->handle); /* temporarily tag the section, use FST_BL_VCDATA on finalize */ xc->section_start = ftello(xc->handle); @@ -1054,7 +1169,7 @@ hashmask |= hashmask >> 16; #endif #endif -if((!xc)||(xc->vchg_siz <= 1)||(xc->already_in_flush)) return; +if((xc->vchg_siz <= 1)||(xc->already_in_flush)) return; xc->already_in_flush = 1; /* should really do this with a semaphore */ xc->section_header_only = 0; @@ -1064,7 +1179,7 @@ vchg_mem = xc->vchg_mem; f = xc->handle; fstWriterVarint(f, xc->maxhandle); /* emit current number of handles */ -fputc(xc->fastpack ? 'F' : 'Z', f); +fputc(xc->fourpack ? '4' : (xc->fastpack ? 'F' : 'Z'), f); fpos = 1; packmemlen = 1024; /* maintain a running "longest" allocation to */ @@ -1184,6 +1299,8 @@ for(i=0;imaxhandle;i++) if(is_binary) { unsigned char acc = 0; +#ifdef FST_DISABLE_DUFFS_DEVICE + /* old algorithm */ int shift = 7 - ((vm4ip[1]-1) & 7); for(idx=vm4ip[1]-1;idx>=0;idx--) { @@ -1196,6 +1313,24 @@ for(i=0;imaxhandle;i++) acc = 0; } } +#else + /* new algorithm */ + idx = ((vm4ip[1]+7) & ~7); + switch(vm4ip[1] & 7) + { + case 0: do { acc = (pnt[idx+7-8] & 1) << 0; + case 7: acc |= (pnt[idx+6-8] & 1) << 1; + case 6: acc |= (pnt[idx+5-8] & 1) << 2; + case 5: acc |= (pnt[idx+4-8] & 1) << 3; + case 4: acc |= (pnt[idx+3-8] & 1) << 4; + case 3: acc |= (pnt[idx+2-8] & 1) << 5; + case 2: acc |= (pnt[idx+1-8] & 1) << 6; + case 1: acc |= (pnt[idx+0-8] & 1) << 7; + *(--scratchpnt) = acc; + idx -= 8; + } while(idx); + } +#endif scratchpnt = fstCopyVarint32ToLeft(scratchpnt, (time_delta << 1)); } @@ -1226,7 +1361,7 @@ for(i=0;imaxhandle;i++) else { free(packmem); - dmem = packmem = malloc(packmemlen = wrlen); + dmem = packmem = malloc(compressBound(packmemlen = wrlen)); } rc = compress2(dmem, &destlen, scratchpnt, wrlen, 4); @@ -1273,6 +1408,7 @@ for(i=0;imaxhandle;i++) } else { + /* this is extremely conservative: fastlz needs +5% for worst case, lz4 needs siz+(siz/255)+16 */ if(((wrlen * 2) + 2) <= packmemlen) { dmem = packmem; @@ -1283,7 +1419,7 @@ for(i=0;imaxhandle;i++) dmem = packmem = malloc(packmemlen = (wrlen * 2) + 2); } - rc = fastlz_compress(scratchpnt, wrlen, dmem); + rc = (xc->fourpack) ? LZ4_compress((char *)scratchpnt, (char *)dmem, wrlen) : fastlz_compress(scratchpnt, wrlen, dmem); if(rc < destlen) { #ifndef FST_DYNAMIC_ALIAS_DISABLE @@ -1366,42 +1502,89 @@ free(scratchpad); scratchpad = NULL; indxpos = ftello(f); xc->secnum++; -for(i=0;imaxhandle;i++) +#ifndef FST_DYNAMIC_ALIAS2_DISABLE +if(1) { - vm4ip = &(xc->valpos_mem[4*i]); + uint32_t prev_alias = 0; - if(vm4ip[2]) + for(i=0;imaxhandle;i++) { - if(zerocnt) - { - fpos += fstWriterVarint(f, (zerocnt << 1)); - zerocnt = 0; - } + vm4ip = &(xc->valpos_mem[4*i]); - if(vm4ip[2] & 0x80000000) + if(vm4ip[2]) { - fpos += fstWriterVarint(f, 0); /* signal */ - fpos += fstWriterVarint(f, (-(int32_t)vm4ip[2])); + if(zerocnt) + { + fpos += fstWriterVarint(f, (zerocnt << 1)); + zerocnt = 0; + } + + if(vm4ip[2] & 0x80000000) + { + if(vm4ip[2] != prev_alias) + { + fpos += fstWriterSVarint(f, (((int64_t)((int32_t)(prev_alias = vm4ip[2]))) << 1) | 1); + } + else + { + fpos += fstWriterSVarint(f, (0 << 1) | 1); + } + } + else + { + fpos += fstWriterSVarint(f, ((vm4ip[2] - prevpos) << 1) | 1); + prevpos = vm4ip[2]; + } + vm4ip[2] = 0; + vm4ip[3] = 0; /* clear out tchn idx */ } else { - fpos += fstWriterVarint(f, ((vm4ip[2] - prevpos) << 1) | 1); - prevpos = vm4ip[2]; + zerocnt++; } - vm4ip[2] = 0; - vm4ip[3] = 0; /* clear out tchn idx */ - } - else - { - zerocnt++; } } + else +#endif + { + for(i=0;imaxhandle;i++) + { + vm4ip = &(xc->valpos_mem[4*i]); + + if(vm4ip[2]) + { + if(zerocnt) + { + fpos += fstWriterVarint(f, (zerocnt << 1)); + zerocnt = 0; + } + + if(vm4ip[2] & 0x80000000) + { + fpos += fstWriterVarint(f, 0); /* signal, note that using a *signed* varint would be more efficient than this byte escape! */ + fpos += fstWriterVarint(f, (-(int32_t)vm4ip[2])); + } + else + { + fpos += fstWriterVarint(f, ((vm4ip[2] - prevpos) << 1) | 1); + prevpos = vm4ip[2]; + } + vm4ip[2] = 0; + vm4ip[3] = 0; /* clear out tchn idx */ + } + else + { + zerocnt++; + } + } + } + if(zerocnt) { /* fpos += */ fstWriterVarint(f, (zerocnt << 1)); /* scan-build */ } #ifdef FST_DEBUG -printf("value chains: %d\n", cnt); +fprintf(stderr, "value chains: %d\n", cnt); #endif xc->vchg_mem[0] = '!'; @@ -1419,7 +1602,7 @@ tmem = fstMmap(NULL, tlen, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(xc->tchn_han if(tmem) { unsigned long destlen = tlen; - unsigned char *dmem = malloc(destlen); + unsigned char *dmem = malloc(compressBound(destlen)); int rc = compress2(dmem, &destlen, tmem, tlen, 9); if((rc == Z_OK) && (destlen < tlen)) @@ -1454,7 +1637,11 @@ fflush(xc->handle); fstWriterFseeko(xc, xc->handle, xc->section_start-1, SEEK_SET); /* write out FST_BL_VCDATA over FST_BL_SKIP */ #ifndef FST_DYNAMIC_ALIAS_DISABLE +#ifndef FST_DYNAMIC_ALIAS2_DISABLE +fputc(FST_BL_VCDATA_DYN_ALIAS2, xc->handle); +#else fputc(FST_BL_VCDATA_DYN_ALIAS, xc->handle); +#endif #else fputc(FST_BL_VCDATA, xc->handle); #endif @@ -1472,7 +1659,7 @@ if(xc->dump_size_limit) xc2->size_limit_locked = 1; xc2->is_initial_time = 1; /* to trick emit value and emit time change */ #ifdef FST_DEBUG - printf("<< dump file size limit reached, stopping dumping >>\n"); + fprintf(stderr, "<< dump file size limit reached, stopping dumping >>\n"); #endif } } @@ -1645,7 +1832,7 @@ if(xc && !xc->already_in_close && !xc->already_in_flush) if(tmem) { unsigned long destlen = tlen; - unsigned char *dmem = malloc(destlen); + unsigned char *dmem = malloc(compressBound(destlen)); int rc = compress2(dmem, &destlen, tmem, tlen, 9); if((rc != Z_OK) || (destlen > tlen)) @@ -1708,10 +1895,10 @@ if(xc && !xc->already_in_close && !xc->already_in_flush) if(xc->compress_hier) { - unsigned char *mem = malloc(FST_GZIO_LEN); off_t hl, eos; gzFile zhandle; int zfd; + int fourpack_duo = 0; #ifndef __MINGW32__ char *fnam = malloc(strlen(xc->filename) + 5 + 1); #endif @@ -1722,25 +1909,67 @@ if(xc && !xc->already_in_close && !xc->already_in_flush) fstWriterUint64(xc->handle, 0); /* section length */ fstWriterUint64(xc->handle, xc->hier_file_len); /* uncompressed length */ - fflush(xc->handle); - zfd = dup(fileno(xc->handle)); - zhandle = gzdopen(zfd, "wb4"); - if(zhandle) + if(!xc->fourpack) { - fstWriterFseeko(xc, xc->hier_handle, 0, SEEK_SET); - for(hl = 0; hl < xc->hier_file_len; hl += FST_GZIO_LEN) + unsigned char *mem = malloc(FST_GZIO_LEN); + zfd = dup(fileno(xc->handle)); + fflush(xc->handle); + zhandle = gzdopen(zfd, "wb4"); + if(zhandle) { - unsigned len = ((xc->hier_file_len - hl) > FST_GZIO_LEN) ? FST_GZIO_LEN : (xc->hier_file_len - hl); - fstFread(mem, len, 1, xc->hier_handle); - gzwrite(zhandle, mem, len); + fstWriterFseeko(xc, xc->hier_handle, 0, SEEK_SET); + for(hl = 0; hl < xc->hier_file_len; hl += FST_GZIO_LEN) + { + unsigned len = ((xc->hier_file_len - hl) > FST_GZIO_LEN) ? FST_GZIO_LEN : (xc->hier_file_len - hl); + fstFread(mem, len, 1, xc->hier_handle); + gzwrite(zhandle, mem, len); + } + gzclose(zhandle); } - gzclose(zhandle); + else + { + close(zfd); + } + free(mem); } else { - close(zfd); + int lz4_maxlen; + unsigned char *mem; + unsigned char *hmem; + int packed_len; + + fflush(xc->handle); + + lz4_maxlen = LZ4_compressBound(xc->hier_file_len); + mem = malloc(lz4_maxlen); + hmem = fstMmap(NULL, xc->hier_file_len, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(xc->hier_handle), 0); + packed_len = LZ4_compress((char *)hmem, (char *)mem, xc->hier_file_len); + fstMunmap(hmem, xc->hier_file_len); + + fourpack_duo = (!xc->repack_on_close) && (xc->hier_file_len > FST_HDR_FOURPACK_DUO_SIZE); /* double pack when hierarchy is large */ + + if(fourpack_duo) /* double packing with LZ4 is faster than gzip */ + { + unsigned char *mem_duo; + int lz4_maxlen_duo; + int packed_len_duo; + + lz4_maxlen_duo = LZ4_compressBound(packed_len); + mem_duo = malloc(lz4_maxlen_duo); + packed_len_duo = LZ4_compress((char *)mem, (char *)mem_duo, packed_len); + + fstWriterVarint(xc->handle, packed_len); /* 1st round compressed length */ + fstFwrite(mem_duo, packed_len_duo, 1, xc->handle); + free(mem_duo); + } + else + { + fstFwrite(mem, packed_len, 1, xc->handle); + } + + free(mem); } - free(mem); fstWriterFseeko(xc, xc->handle, 0, SEEK_END); eos = ftello(xc->handle); @@ -1749,7 +1978,9 @@ if(xc && !xc->already_in_close && !xc->already_in_flush) fflush(xc->handle); fstWriterFseeko(xc, xc->handle, fixup_offs, SEEK_SET); - fputc(FST_BL_HIER, xc->handle); /* actual tag */ + fputc(xc->fourpack ? + ( fourpack_duo ? FST_BL_HIER_LZ4DUO : FST_BL_HIER_LZ4) : + FST_BL_HIER, xc->handle); /* actual tag now also == compression type */ fstWriterFseeko(xc, xc->handle, 0, SEEK_END); /* move file pointer to end for any section adds */ fflush(xc->handle); @@ -2129,12 +2360,13 @@ if(xc) } -void fstWriterSetPackType(void *ctx, int typ) +void fstWriterSetPackType(void *ctx, enum fstWriterPackType typ) { struct fstWriterContext *xc = (struct fstWriterContext *)ctx; if(xc) { - xc->fastpack = (typ != 0); + xc->fastpack = (typ != FST_WR_PT_ZLIB); + xc->fourpack = (typ == FST_WR_PT_LZ4); } } @@ -2720,11 +2952,14 @@ unsigned char *temp_signal_value_buf; /* malloced for len in longest_signal_valu signed char timescale; unsigned char filetype; +unsigned use_vcd_extensions : 1; unsigned double_endian_match : 1; unsigned native_doubles_for_cb : 1; unsigned contains_geom_section : 1; -unsigned contains_hier_section : 1; /* valid for hier_pos */ -unsigned limit_range_valid : 1; /* valid for limit_range_start, limit_range_end */ +unsigned contains_hier_section : 1; /* valid for hier_pos */ +unsigned contains_hier_section_lz4duo : 1; /* valid for hier_pos (contains_hier_section_lz4 always also set) */ +unsigned contains_hier_section_lz4 : 1; /* valid for hier_pos */ +unsigned limit_range_valid : 1; /* valid for limit_range_start, limit_range_end */ char version[FST_HDR_SIM_VERSION_SIZE + 1]; char date[FST_HDR_DATE_SIZE + 1]; @@ -2773,6 +3008,14 @@ char str_scope_nam[FST_ID_NAM_SIZ+1]; char str_scope_comp[FST_ID_NAM_SIZ+1]; unsigned fseek_failed : 1; + +/* self-buffered I/O for writes */ + +#ifndef FST_WRITEX_DISABLE +int writex_pos; +int writex_fd; +unsigned char writex_buf[FST_WRITEX_MAX]; +#endif }; @@ -2793,6 +3036,41 @@ return(rc); } +#ifndef FST_WRITEX_DISABLE +static void fstWritex(struct fstReaderContext *xc, void *v, int len) +{ +unsigned char *s = (unsigned char *)v; + +if(len) + { + if(len < FST_WRITEX_MAX) + { + if(xc->writex_pos + len >= FST_WRITEX_MAX) + { + fstWritex(xc, NULL, 0); + } + + memcpy(xc->writex_buf + xc->writex_pos, s, len); + xc->writex_pos += len; + } + else + { + fstWritex(xc, NULL, 0); + if (write(xc->writex_fd, s, len)) { }; + } + } + else + { + if(xc->writex_pos) + { + if(write(xc->writex_fd, xc->writex_buf, xc->writex_pos)) { }; + xc->writex_pos = 0; + } + } +} +#endif + + /* * scope -> flat name handling */ @@ -3174,6 +3452,17 @@ if(xc) } +void fstReaderSetVcdExtensions(void *ctx, int enable) +{ +struct fstReaderContext *xc = (struct fstReaderContext *)ctx; + +if(xc) + { + xc->use_vcd_extensions = (enable != 0); + } +} + + void fstReaderIterBlocksSetNativeDoublesOnCallback(void *ctx, int enable) { struct fstReaderContext *xc = (struct fstReaderContext *)ctx; @@ -3186,54 +3475,34 @@ if(xc) /* * hierarchy processing */ -static char *fstVcdID(int value) +static void fstVcdID(char *buf, unsigned int value) { -static char buf[16]; char *pnt = buf; -int vmod; /* zero is illegal for a value...it is assumed they start at one */ -for(;;) +while (value) { - if((vmod = (value % 94))) - { - *(pnt++) = (char)(vmod + 32); - } - else - { - *(pnt++) = '~'; value -= 94; - } + value--; + *(pnt++) = (char)('!' + value % 94); value = value / 94; - if(!value) { break; } } *pnt = 0; -return(buf); } -static char *fstVcdIDForFwrite(int value, int *len) +static int fstVcdIDForFwrite(char *buf, unsigned int value) { -static char buf[16]; char *pnt = buf; -int vmod; /* zero is illegal for a value...it is assumed they start at one */ -for(;;) +while (value) { - if((vmod = (value % 94))) - { - *(pnt++) = (char)(vmod + 32); - } - else - { - *(pnt++) = '~'; value -= 94; - } + value--; + *(pnt++) = (char)('!' + value % 94); value = value / 94; - if(!value) { break; } } -*len = pnt-buf; -return(buf); +return(pnt - buf); } @@ -3247,21 +3516,50 @@ if(!xc->fh) char *fnam = malloc(strlen(xc->filename) + 6 + 16 + 32 + 1); unsigned char *mem = malloc(FST_GZIO_LEN); off_t hl, uclen; - gzFile zhandle; + off_t clen = 0; + gzFile zhandle = NULL; int zfd; + int htyp = FST_BL_SKIP; + + /* can't handle both set at once should never happen in a real file */ + if(!xc->contains_hier_section_lz4 && xc->contains_hier_section) + { + htyp = FST_BL_HIER; + } + else + if(xc->contains_hier_section_lz4 && !xc->contains_hier_section) + { + htyp = xc->contains_hier_section_lz4duo ? FST_BL_HIER_LZ4DUO : FST_BL_HIER_LZ4; + } sprintf(fnam, "%s.hier_%d_%p", xc->filename, getpid(), (void *)xc); fstReaderFseeko(xc, xc->f, xc->hier_pos, SEEK_SET); uclen = fstReaderUint64(xc->f); fflush(xc->f); - zfd = dup(fileno(xc->f)); - zhandle = gzdopen(zfd, "rb"); - if(!zhandle) + + if(htyp == FST_BL_HIER) { - close(zfd); - free(mem); - free(fnam); - return(0); + fstReaderFseeko(xc, xc->f, xc->hier_pos, SEEK_SET); + uclen = fstReaderUint64(xc->f); + fflush(xc->f); + + zfd = dup(fileno(xc->f)); + zhandle = gzdopen(zfd, "rb"); + if(!zhandle) + { + close(zfd); + free(mem); + free(fnam); + return(0); + } + } + else + if((htyp == FST_BL_HIER_LZ4) || (htyp == FST_BL_HIER_LZ4DUO)) + { + fstReaderFseeko(xc, xc->f, xc->hier_pos - 8, SEEK_SET); /* get section len */ + clen = fstReaderUint64(xc->f) - 16; + uclen = fstReaderUint64(xc->f); + fflush(xc->f); } #ifndef __MINGW32__ @@ -3282,26 +3580,79 @@ if(!xc->fh) if(fnam) unlink(fnam); #endif - for(hl = 0; hl < uclen; hl += FST_GZIO_LEN) + if(htyp == FST_BL_HIER) { - size_t len = ((uclen - hl) > FST_GZIO_LEN) ? FST_GZIO_LEN : (uclen - hl); - size_t gzreadlen = gzread(zhandle, mem, len); /* rc should equal len... */ - size_t fwlen; - - if(gzreadlen != len) + for(hl = 0; hl < uclen; hl += FST_GZIO_LEN) { - pass_status = 0; - break; + size_t len = ((uclen - hl) > FST_GZIO_LEN) ? FST_GZIO_LEN : (uclen - hl); + size_t gzreadlen = gzread(zhandle, mem, len); /* rc should equal len... */ + size_t fwlen; + + if(gzreadlen != len) + { + pass_status = 0; + break; + } + + fwlen = fstFwrite(mem, len, 1, xc->fh); + if(fwlen != 1) + { + pass_status = 0; + break; + } + } + gzclose(zhandle); + } + else + if(htyp == FST_BL_HIER_LZ4DUO) + { + unsigned char *lz4_cmem = malloc(clen); + unsigned char *lz4_ucmem = malloc(uclen); + unsigned char *lz4_ucmem2; + uint64_t uclen2; + int skiplen2 = 0; + + fstFread(lz4_cmem, clen, 1, xc->f); + + uclen2 = fstGetVarint64(lz4_cmem, &skiplen2); + lz4_ucmem2 = malloc(uclen2); + pass_status = (uclen2 == LZ4_decompress_safe_partial ((char *)lz4_cmem + skiplen2, (char *)lz4_ucmem2, clen - skiplen2, uclen2, uclen2)); + if(pass_status) + { + pass_status = (uclen == LZ4_decompress_safe_partial ((char *)lz4_ucmem2, (char *)lz4_ucmem, uclen2, uclen, uclen)); + + if(fstFwrite(lz4_ucmem, uclen, 1, xc->fh) != 1) + { + pass_status = 0; + } } - fwlen = fstFwrite(mem, len, 1, xc->fh); - if(fwlen != 1) + free(lz4_ucmem2); + free(lz4_ucmem); + free(lz4_cmem); + } + else + if(htyp == FST_BL_HIER_LZ4) + { + unsigned char *lz4_cmem = malloc(clen); + unsigned char *lz4_ucmem = malloc(uclen); + + fstFread(lz4_cmem, clen, 1, xc->f); + pass_status = (uclen == LZ4_decompress_safe_partial ((char *)lz4_cmem, (char *)lz4_ucmem, clen, uclen, uclen)); + + if(fstFwrite(lz4_ucmem, uclen, 1, xc->fh) != 1) { pass_status = 0; - break; } - } - gzclose(zhandle); + + free(lz4_ucmem); + free(lz4_cmem); + } + else /* FST_BL_SKIP */ + { + pass_status = 0; + } + free(mem); free(fnam); @@ -3503,6 +3854,7 @@ uint32_t len, alias; int num_signal_dyn = 65536; int attrtype, subtype; uint64_t attrarg; +fstHandle maxhandle_scanbuild; if(!xc) return(0); @@ -3529,9 +3881,9 @@ if(fv) switch(xc->timescale) { - case 2: time_scale = 100; time_dimension[0] = ' '; break; + case 2: time_scale = 100; time_dimension[0] = 0; break; case 1: time_scale = 10; - case 0: time_dimension[0] = ' '; break; + case 0: time_dimension[0] = 0; break; case -1: time_scale = 100; time_dimension[0] = 'm'; break; case -2: time_scale = 10; @@ -3566,7 +3918,7 @@ if(fv) if(fv) fprintf(fv, "$timescale\n\t%d%ss\n$end\n", time_scale, time_dimension); } -xc->maxhandle = 0; +xc->maxhandle = 0; xc->num_alias = 0; free(xc->signal_lens); @@ -3613,7 +3965,7 @@ while(!feof(xc->fh)) attrarg = fstReaderVarint64(xc->fh); - if(fv) + if(fv && xc->use_vcd_extensions) { switch(attrtype) { @@ -3652,7 +4004,7 @@ while(!feof(xc->fh)) break; case FST_ST_GEN_ATTREND: - if(fv) fprintf(fv, "$attrend $end\n"); + if(fv && xc->use_vcd_extensions) fprintf(fv, "$attrend $end\n"); break; case FST_VT_VCD_EVENT: @@ -3720,8 +4072,10 @@ while(!feof(xc->fh)) } if(fv) { + char vcdid_buf[16]; uint32_t modlen = (vartype != FST_VT_VCD_PORT) ? len : ((len - 2) / 3); - fprintf(fv, "$var %s %"PRIu32" %s %s $end\n", vartypes[vartype], modlen, fstVcdID(xc->maxhandle+1), str); + fstVcdID(vcdid_buf, xc->maxhandle+1); + fprintf(fv, "$var %s %"PRIu32" %s %s $end\n", vartypes[vartype], modlen, vcdid_buf, str); } xc->maxhandle++; } @@ -3734,8 +4088,10 @@ while(!feof(xc->fh)) } if(fv) { + char vcdid_buf[16]; uint32_t modlen = (vartype != FST_VT_VCD_PORT) ? len : ((len - 2) / 3); - fprintf(fv, "$var %s %"PRIu32" %s %s $end\n", vartypes[vartype], modlen, fstVcdID(alias), str); + fstVcdID(vcdid_buf, alias); + fprintf(fv, "$var %s %"PRIu32" %s %s $end\n", vartypes[vartype], modlen, vcdid_buf, str); } xc->num_alias++; } @@ -3748,11 +4104,13 @@ while(!feof(xc->fh)) } if(fv) fprintf(fv, "$enddefinitions $end\n"); -xc->signal_lens = realloc(xc->signal_lens, xc->maxhandle*sizeof(uint32_t)); -xc->signal_typs = realloc(xc->signal_typs, xc->maxhandle*sizeof(unsigned char)); +maxhandle_scanbuild = xc->maxhandle ? xc->maxhandle : 1; /*scan-build warning suppression, in reality we have at least one signal */ + +xc->signal_lens = realloc(xc->signal_lens, maxhandle_scanbuild*sizeof(uint32_t)); +xc->signal_typs = realloc(xc->signal_typs, maxhandle_scanbuild*sizeof(unsigned char)); free(xc->process_mask); -xc->process_mask = calloc(1, (xc->maxhandle+7)/8); +xc->process_mask = calloc(1, (maxhandle_scanbuild+7)/8); free(xc->temp_signal_value_buf); xc->temp_signal_value_buf = malloc(xc->longest_signal_value_len + 1); @@ -3937,7 +4295,7 @@ if(gzread_pass_status) xc->timezero = fstReaderUint64(xc->f); } } - else if((sectype == FST_BL_VCDATA) || (sectype == FST_BL_VCDATA_DYN_ALIAS)) + else if((sectype == FST_BL_VCDATA) || (sectype == FST_BL_VCDATA_DYN_ALIAS) || (sectype == FST_BL_VCDATA_DYN_ALIAS2)) { if(hdr_incomplete) { @@ -4029,6 +4387,17 @@ if(gzread_pass_status) xc->contains_hier_section = 1; xc->hier_pos = ftello(xc->f); } + else if(sectype == FST_BL_HIER_LZ4DUO) + { + xc->contains_hier_section_lz4 = 1; + xc->contains_hier_section_lz4duo = 1; + xc->hier_pos = ftello(xc->f); + } + else if(sectype == FST_BL_HIER_LZ4) + { + xc->contains_hier_section_lz4 = 1; + xc->hier_pos = ftello(xc->f); + } else if(sectype == FST_BL_BLACKOUT) { uint32_t i; @@ -4107,7 +4476,7 @@ if((!nam)||(!(xc->f=fopen(nam, "rb")))) xc->filename = strdup(nam); rc = fstReaderInit(xc); - if((rc) && (xc->vc_section_count) && (xc->maxhandle) && ((xc->fh)||(xc->contains_hier_section))) + if((rc) && (xc->vc_section_count) && (xc->maxhandle) && ((xc->fh)||(xc->contains_hier_section||(xc->contains_hier_section_lz4)))) { /* more init */ xc->do_rewind = 1; @@ -4226,6 +4595,8 @@ uint32_t traversal_mem_offs; uint32_t *scatterptr, *headptr, *length_remaining; uint32_t cur_blackout = 0; int packtype; +unsigned char *mc_mem = NULL; +uint32_t mc_mem_len; /* corresponds to largest value encountered in chain_table_lengths[i] */ if(!xc) return(0); @@ -4233,7 +4604,15 @@ scatterptr = calloc(xc->maxhandle, sizeof(uint32_t)); headptr = calloc(xc->maxhandle, sizeof(uint32_t)); length_remaining = calloc(xc->maxhandle, sizeof(uint32_t)); -if(fv) { fprintf(fv, "$dumpvars\n"); } +if(fv) + { + fprintf(fv, "$dumpvars\n"); +#ifndef FST_WRITEX_DISABLE + fflush(fv); + setvbuf(fv, (char *) NULL, _IONBF, 0); /* even buffered IO is slow so disable it and use our own routines that don't need seeking */ + xc->writex_fd = fileno(fv); +#endif + } for(;;) { @@ -4248,13 +4627,13 @@ for(;;) if((sectype == EOF) || (sectype == FST_BL_SKIP)) { #ifdef FST_DEBUG - printf("<< EOF >>\n"); + fprintf(stderr, "<< EOF >>\n"); #endif break; } blkpos++; - if((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS)) + if((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS) && (sectype != FST_BL_VCDATA_DYN_ALIAS2)) { blkpos += seclen; continue; @@ -4287,9 +4666,9 @@ for(;;) mem_required_for_traversal = fstReaderUint64(xc->f); mem_for_traversal = malloc(mem_required_for_traversal + 66); /* add in potential fastlz overhead */ #ifdef FST_DEBUG - printf("sec: %d seclen: %d begtim: %d endtim: %d\n", + fprintf(stderr, "sec: %d seclen: %d begtim: %d endtim: %d\n", secnum, (int)seclen, (int)beg_tim, (int)end_tim); - printf("\tmem_required_for_traversal: %d\n", (int)mem_required_for_traversal); + fprintf(stderr, "\tmem_required_for_traversal: %d\n", (int)mem_required_for_traversal); #endif /* process time block */ { @@ -4307,7 +4686,7 @@ for(;;) tsec_clen = fstReaderUint64(xc->f); tsec_nitems = fstReaderUint64(xc->f); #ifdef FST_DEBUG - printf("\ttime section unc: %d, com: %d (%d items)\n", + fprintf(stderr, "\ttime section unc: %d, com: %d (%d items)\n", (int)tsec_uclen, (int)tsec_clen, (int)tsec_nitems); #endif if(tsec_clen > seclen) break; /* corrupted tsec_clen: by definition it can't be larger than size of section */ @@ -4350,7 +4729,7 @@ for(;;) tpnt += skiplen; } - tc_head = calloc(tsec_nitems, sizeof(uint32_t)); + tc_head = calloc(tsec_nitems /* scan-build */ ? tsec_nitems : 1, sizeof(uint32_t)); free(ucdata); } @@ -4369,12 +4748,20 @@ for(;;) if(fv) { - if(beg_tim) { fprintf(fv, "#%"PRIu64"\n", beg_tim); } + char wx_buf[32]; + int wx_len; + + if(beg_tim) + { + wx_len = sprintf(wx_buf, "#%"PRIu64"\n", beg_tim); + fstWritex(xc, wx_buf, wx_len); + } if((xc->num_blackouts)&&(cur_blackout != xc->num_blackouts)) { if(beg_tim == xc->blackout_times[cur_blackout]) { - fprintf(fv, "$dump%s $end\n", (xc->blackout_activity[cur_blackout++]) ? "on" : "off"); + wx_len = sprintf(wx_buf, "$dump%s $end\n", (xc->blackout_activity[cur_blackout++]) ? "on" : "off"); + fstWritex(xc, wx_buf, wx_len); } } } @@ -4424,11 +4811,12 @@ for(;;) { if(fv) { - int vcdid_len; - const char *vcd_id = fstVcdIDForFwrite(idx+1, &vcdid_len); - fputc(val, fv); - fstFwrite(vcd_id, vcdid_len, 1, fv); - fputc('\n', fv); + char vcd_id[16]; + + int vcdid_len = fstVcdIDForFwrite(vcd_id+1, idx+1); + vcd_id[0] = val; /* collapse 3 writes into one I/O call */ + vcd_id[vcdid_len + 1] = '\n'; + fstWritex(xc, vcd_id, vcdid_len + 2); } } } @@ -4451,13 +4839,16 @@ for(;;) { if(fv) { - int vcdid_len; - const char *vcd_id = fstVcdIDForFwrite(idx+1, &vcdid_len); - fputc((xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p', fv); - fstFwrite(mu+sig_offs, xc->signal_lens[idx], 1, fv); - fputc(' ', fv); - fstFwrite(vcd_id, vcdid_len, 1, fv); - fputc('\n', fv); + char vcd_id[16]; + int vcdid_len = fstVcdIDForFwrite(vcd_id+1, idx+1); + + vcd_id[0] = (xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p'; + fstWritex(xc, vcd_id, 1); + fstWritex(xc,mu+sig_offs, xc->signal_lens[idx]); + + vcd_id[0] = ' '; /* collapse 3 writes into one I/O call */ + vcd_id[vcdid_len + 1] = '\n'; + fstWritex(xc, vcd_id, vcdid_len + 2); } } } @@ -4511,6 +4902,10 @@ for(;;) { if(fv) { + char vcdid_buf[16]; + char wx_buf[64]; + int wx_len; + clone_d = (unsigned char *)&d; if(xc->double_endian_match) { @@ -4526,7 +4921,9 @@ for(;;) } } - fprintf(fv, "r%.16g %s\n", d, fstVcdID(idx+1)); + fstVcdID(vcdid_buf, idx+1); + wx_len = sprintf(wx_buf, "r%.16g %s\n", d, vcdid_buf); + fstWritex(xc, wx_buf, wx_len); } } } @@ -4548,9 +4945,9 @@ for(;;) packtype = fgetc(xc->f); #ifdef FST_DEBUG - printf("\tframe_uclen: %d, frame_clen: %d, frame_maxhandle: %d\n", + fprintf(stderr, "\tframe_uclen: %d, frame_clen: %d, frame_maxhandle: %d\n", (int)frame_uclen, (int)frame_clen, (int)frame_maxhandle); - printf("\tvc_maxhandle: %d, packtype: %c\n", (int)vc_maxhandle, packtype); + fprintf(stderr, "\tvc_maxhandle: %d, packtype: %c\n", (int)vc_maxhandle, packtype); #endif indx_pntr = blkpos + seclen - 24 -tsec_clen -8; @@ -4558,7 +4955,7 @@ for(;;) chain_clen = fstReaderUint64(xc->f); indx_pos = indx_pntr - chain_clen; #ifdef FST_DEBUG - printf("\tindx_pos: %d (%d bytes)\n", (int)indx_pos, (int)chain_clen); + fprintf(stderr, "\tindx_pos: %d (%d bytes)\n", (int)indx_pos, (int)chain_clen); #endif chain_cmem = malloc(chain_clen); if(!chain_cmem) goto block_err; @@ -4581,37 +4978,83 @@ for(;;) idx = 0; pval = 0; - do + if(sectype == FST_BL_VCDATA_DYN_ALIAS2) { - int skiplen; - uint64_t val = fstGetVarint32(pnt, &skiplen); - - if(!val) - { - pnt += skiplen; - val = fstGetVarint32(pnt, &skiplen); - chain_table[idx] = 0; /* need to explicitly zero as calloc above might not run */ - chain_table_lengths[idx] = -val; /* because during this loop iter would give stale data! */ - idx++; - } - else - if(val&1) - { - pval = chain_table[idx] = pval + (val >> 1); - if(idx) { chain_table_lengths[pidx] = pval - chain_table[pidx]; } - pidx = idx++; - } - else - { - int loopcnt = val >> 1; - for(i=0;i> 1; + if(shval > 0) + { + pval = chain_table[idx] = pval + shval; + if(idx) { chain_table_lengths[pidx] = pval - chain_table[pidx]; } + pidx = idx++; + } + else if(shval < 0) + { + chain_table[idx] = 0; /* need to explicitly zero as calloc above might not run */ + chain_table_lengths[idx] = prev_alias = shval; /* because during this loop iter would give stale data! */ + idx++; + } + else + { + chain_table[idx] = 0; /* need to explicitly zero as calloc above might not run */ + chain_table_lengths[idx] = prev_alias; /* because during this loop iter would give stale data! */ + idx++; + } } - } + else + { + uint64_t val = fstGetVarint32(pnt, &skiplen); + + int loopcnt = val >> 1; + for(i=0;i> 1); + if(idx) { chain_table_lengths[pidx] = pval - chain_table[pidx]; } + pidx = idx++; + } + else + { + int loopcnt = val >> 1; + for(i=0;i xc->maxhandle) idx = xc->maxhandle; for(i=0;if, &skiplen); if(val) { - unsigned char *mu = mem_for_traversal + traversal_mem_offs; - unsigned char *mc = malloc(chain_table_lengths[i]); + unsigned char *mu = mem_for_traversal + traversal_mem_offs; /* uncomp: dst */ + unsigned char *mc; /* comp: src */ unsigned long destlen = val; unsigned long sourcelen = chain_table_lengths[i]; - + + if(mc_mem_len < chain_table_lengths[i]) + { + free(mc_mem); + mc_mem = malloc(mc_mem_len = chain_table_lengths[i]); + } + mc = mc_mem; + fstFread(mc, chain_table_lengths[i], 1, xc->f); - if(packtype == 'F') + + switch(packtype) { - rc = fastlz_decompress(mc, sourcelen, mu, destlen); + case '4': rc = (destlen == LZ4_decompress_safe_partial((char *)mc, (char *)mu, sourcelen, destlen, destlen)) ? Z_OK : Z_DATA_ERROR; + break; + case 'F': fastlz_decompress(mc, sourcelen, mu, destlen); /* rc appears unreliable */ + break; + default: rc = uncompress(mu, &destlen, mc, sourcelen); + break; } - else - { - rc = uncompress(mu, &destlen, mc, sourcelen); - } - free(mc); + /* data to process is for(j=0;jlimit_range_valid) @@ -4726,12 +5187,15 @@ for(;;) } } - fprintf(fv, "#%"PRIu64"\n", time_table[i]); + wx_len = sprintf(wx_buf, "#%"PRIu64"\n", time_table[i]); + fstWritex(xc, wx_buf, wx_len); + if((xc->num_blackouts)&&(cur_blackout != xc->num_blackouts)) { if(time_table[i] == xc->blackout_times[cur_blackout]) { - fprintf(fv, "$dump%s $end\n", (xc->blackout_activity[cur_blackout++]) ? "on" : "off"); + wx_len = sprintf(wx_buf, "$dump%s $end\n", (xc->blackout_activity[cur_blackout++]) ? "on" : "off"); + fstWritex(xc, wx_buf, wx_len); } } previous_time = time_table[i]; @@ -4769,11 +5233,12 @@ for(;;) { if(fv) { - int vcdid_len; - const char *vcd_id = fstVcdIDForFwrite(idx+1, &vcdid_len); - fputc(val, fv); - fstFwrite(vcd_id, vcdid_len, 1, fv); - fputc('\n', fv); + char vcd_id[16]; + int vcdid_len = fstVcdIDForFwrite(vcd_id+1, idx+1); + + vcd_id[0] = val; + vcd_id[vcdid_len+1] = '\n'; + fstWritex(xc, vcd_id, vcdid_len+2); } } headptr[idx] += skiplen; @@ -4814,21 +5279,23 @@ for(;;) { if(fv) { + char vcd_id[16]; int vcdid_len; - const char *vcd_id = fstVcdIDForFwrite(idx+1, &vcdid_len); - - fputc('s', fv); + + vcd_id[0] = 's'; + fstWritex(xc, vcd_id, 1); + + vcdid_len = fstVcdIDForFwrite(vcd_id+1, idx+1); { unsigned char *vesc = malloc(len*4 + 1); int vlen = fstUtilityBinToEsc(vesc, vdata, len); - - vesc[vlen] = 0; - fstFwrite(vesc, vlen, 1, fv); + fstWritex(xc, vesc, vlen); free(vesc); } - fputc(' ', fv); - fstFwrite(vcd_id, vcdid_len, 1, fv); - fputc('\n', fv); + + vcd_id[0] = ' '; + vcd_id[vcdid_len + 1] = '\n'; + fstWritex(xc, vcd_id, vcdid_len+2); } } } @@ -4884,8 +5351,10 @@ for(;;) else { if(fv) { - fputc((xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p', fv); - fstFwrite(xc->temp_signal_value_buf, len, 1, fv); + unsigned char ch_bp = (xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p'; + + fstWritex(xc, &ch_bp, 1); + fstWritex(xc, xc->temp_signal_value_buf, len); } } @@ -4903,8 +5372,10 @@ for(;;) { if(fv) { - fputc((xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p', fv); - fstFwrite(vdata, len, 1, fv); + unsigned char ch_bp = (xc->signal_typs[idx] != FST_VT_VCD_PORT) ? 'b' : 'p'; + + fstWritex(xc, &ch_bp, 1); + fstWritex(xc, vdata, len); } } } @@ -4981,6 +5452,9 @@ for(;;) { if(fv) { + char wx_buf[32]; + int wx_len; + clone_d = (unsigned char *)&d; if(xc->double_endian_match) { @@ -4996,18 +5470,19 @@ for(;;) } } - fprintf(fv, "r%.16g", d); + wx_len = sprintf(wx_buf, "r%.16g", d); + fstWritex(xc, wx_buf, wx_len); } } } if(fv) { - int vcdid_len; - const char *vcd_id = fstVcdIDForFwrite(idx+1, &vcdid_len); - fputc(' ', fv); - fstFwrite(vcd_id, vcdid_len, 1, fv); - fputc('\n', fv); + char vcd_id[16]; + int vcdid_len = fstVcdIDForFwrite(vcd_id+1, idx+1); + vcd_id[0] = ' '; + vcd_id[vcdid_len+1] = '\n'; + fstWritex(xc, vcd_id, vcdid_len+2); } skiplen += len; @@ -5032,25 +5507,30 @@ for(;;) block_err: free(tc_head); free(chain_cmem); - free(mem_for_traversal); + free(mem_for_traversal); mem_for_traversal = NULL; secnum++; if(secnum == xc->vc_section_count) break; /* in case file is growing, keep with original block count */ blkpos += seclen; } +if(mem_for_traversal) free(mem_for_traversal); /* scan-build */ free(length_remaining); free(headptr); free(scatterptr); -if(chain_table) - { - free(chain_table); - free(chain_table_lengths); - } +if(chain_table) free(chain_table); +if(chain_table_lengths) free(chain_table_lengths); free(time_table); +#ifndef FST_WRITEX_DISABLE +if(fv) + { + fstWritex(xc, NULL, 0); + } +#endif + return(1); } @@ -5167,7 +5647,7 @@ for(;;) } blkpos++; - if((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS)) + if((sectype != FST_BL_VCDATA) && (sectype != FST_BL_VCDATA_DYN_ALIAS) && (sectype != FST_BL_VCDATA_DYN_ALIAS2)) { blkpos += seclen; continue; @@ -5189,7 +5669,7 @@ for(;;) beg_tim2 = fstReaderUint64(xc->f); end_tim2 = fstReaderUint64(xc->f); - if(((sectype != FST_BL_VCDATA)&&(sectype != FST_BL_VCDATA_DYN_ALIAS)) || (!seclen) || (beg_tim2 != tim)) + if(((sectype != FST_BL_VCDATA)&&(sectype != FST_BL_VCDATA_DYN_ALIAS)&&(sectype != FST_BL_VCDATA_DYN_ALIAS2)) || (!seclen) || (beg_tim2 != tim)) { blkpos = prev_blkpos; break; @@ -5214,9 +5694,9 @@ mem_required_for_traversal = fstReaderUint64(xc->f); #ifdef FST_DEBUG -printf("rvat sec: %d seclen: %d begtim: %d endtim: %d\n", +fprintf(stderr, "rvat sec: %d seclen: %d begtim: %d endtim: %d\n", secnum, (int)seclen, (int)beg_tim, (int)end_tim); -printf("\tmem_required_for_traversal: %d\n", (int)mem_required_for_traversal); +fprintf(stderr, "\tmem_required_for_traversal: %d\n", (int)mem_required_for_traversal); #endif /* process time block */ @@ -5235,7 +5715,7 @@ tsec_uclen = fstReaderUint64(xc->f); tsec_clen = fstReaderUint64(xc->f); tsec_nitems = fstReaderUint64(xc->f); #ifdef FST_DEBUG -printf("\ttime section unc: %d, com: %d (%d items)\n", +fprintf(stderr, "\ttime section unc: %d, com: %d (%d items)\n", (int)tsec_uclen, (int)tsec_clen, (int)tsec_nitems); #endif ucdata = malloc(tsec_uclen); @@ -5310,9 +5790,9 @@ xc->rvat_vc_maxhandle = fstReaderVarint64(xc->f); xc->rvat_vc_start = ftello(xc->f); /* points to '!' character */ #ifdef FST_DEBUG -printf("\tframe_uclen: %d, frame_clen: %d, frame_maxhandle: %d\n", +fprintf(stderr, "\tframe_uclen: %d, frame_clen: %d, frame_maxhandle: %d\n", (int)frame_uclen, (int)frame_clen, (int)xc->rvat_frame_maxhandle); -printf("\tvc_maxhandle: %d\n", (int)xc->rvat_vc_maxhandle); +fprintf(stderr, "\tvc_maxhandle: %d\n", (int)xc->rvat_vc_maxhandle); #endif indx_pntr = blkpos + seclen - 24 -tsec_clen -8; @@ -5320,7 +5800,7 @@ fstReaderFseeko(xc, xc->f, indx_pntr, SEEK_SET); chain_clen = fstReaderUint64(xc->f); indx_pos = indx_pntr - chain_clen; #ifdef FST_DEBUG -printf("\tindx_pos: %d (%d bytes)\n", (int)indx_pos, (int)chain_clen); +fprintf(stderr, "\tindx_pos: %d (%d bytes)\n", (int)indx_pos, (int)chain_clen); #endif chain_cmem = malloc(chain_clen); fstReaderFseeko(xc, xc->f, indx_pos, SEEK_SET); @@ -5384,7 +5864,7 @@ for(i=0;irvat_data_valid = 1; @@ -5647,7 +6127,6 @@ if(xc->signal_lens[facidx] == 1) /**********************************************************************/ -#ifndef FST_DYNAMIC_ALIAS_DISABLE #ifndef _WAVE_HAVE_JUDY /***********************/ @@ -5850,7 +6329,6 @@ if(base && *base) } } -#endif #endif /**********************************************************************/ diff --git a/vpi/fstapi.h b/vpi/fstapi.h index 9ace034ac..d45a3894e 100644 --- a/vpi/fstapi.h +++ b/vpi/fstapi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Tony Bybell. + * Copyright (c) 2009-2014 Tony Bybell. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -40,6 +40,12 @@ extern "C" { typedef uint32_t fstHandle; +enum fstWriterPackType { + FST_WR_PT_ZLIB = 0, + FST_WR_PT_FASTLZ = 1, + FST_WR_PT_LZ4 = 2 +}; + enum fstFileType { FST_FT_MIN = 0, @@ -57,6 +63,9 @@ enum fstBlockType { FST_BL_GEOM = 3, FST_BL_HIER = 4, FST_BL_VCDATA_DYN_ALIAS = 5, + FST_BL_HIER_LZ4 = 6, + FST_BL_HIER_LZ4DUO = 7, + FST_BL_VCDATA_DYN_ALIAS2 = 8, FST_BL_ZWRAPPER = 254, /* indicates that whole trace is gz wrapped */ FST_BL_SKIP = 255 /* used while block is being written */ @@ -338,7 +347,7 @@ void fstWriterSetDate(void *ctx, const char *dat); void fstWriterSetDumpSizeLimit(void *ctx, uint64_t numbytes); void fstWriterSetEnvVar(void *ctx, const char *envvar); void fstWriterSetFileType(void *ctx, enum fstFileType filetype); -void fstWriterSetPackType(void *ctx, int typ); /* type = 0 (libz), 1 (fastlz) */ +void fstWriterSetPackType(void *ctx, enum fstWriterPackType typ); void fstWriterSetParallelMode(void *ctx, int enable); void fstWriterSetRepackOnClose(void *ctx, int enable); /* type = 0 (none), 1 (libz) */ void fstWriterSetScope(void *ctx, enum fstScopeType scopetype, @@ -401,6 +410,7 @@ void fstReaderSetFacProcessMask(void *ctx, fstHandle facidx); void fstReaderSetFacProcessMaskAll(void *ctx); void fstReaderSetLimitTimeRange(void *ctx, uint64_t start_time, uint64_t end_time); void fstReaderSetUnlimitedTimeRange(void *ctx); +void fstReaderSetVcdExtensions(void *ctx, int enable); /* diff --git a/vpi/lz4.c b/vpi/lz4.c new file mode 100644 index 000000000..ee37895f4 --- /dev/null +++ b/vpi/lz4.c @@ -0,0 +1,877 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** + Tuning parameters +**************************************/ +/* + * MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#define MEMORY_USAGE 14 + +/* + * HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). + */ +#define HEAPMODE 0 + + +/************************************** + CPU Feature Detection +**************************************/ +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ + || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \ + || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \ + || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */ +# define LZ4_ARCH64 1 +#else +# define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian ? + * Overwrite the #define below if you know your architecture endianess + */ +#if defined (__GLIBC__) +# include +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define LZ4_BIG_ENDIAN 1 +# endif +#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) +# define LZ4_BIG_ENDIAN 1 +#elif defined(__sparc) || defined(__sparc__) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ + || defined(__hpux) || defined(__hppa) \ + || defined(_MIPSEB) || defined(__s390__) +# define LZ4_BIG_ENDIAN 1 +#else +/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ +#endif + +/* + * Unaligned memory access is automatically enabled for "common" CPU, such as x86. + * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property + * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance + */ +#if defined(__ARM_FEATURE_UNALIGNED) +# define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +/* Define this parameter if your target system or compiler does not support hardware bit count */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + +/* + * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : + * This option may provide a small boost to performance for some big endian cpu, although probably modest. + * You may set this option to 1 if data will remain within closed environment. + * This option is useless on Little_Endian CPU (such as x86) + */ + +/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ + + +/************************************** + Compiler Options +**************************************/ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +/* "restrict" is a known keyword */ +#else +# define restrict /* Disable restrict */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# if LZ4_ARCH64 /* 64-bits */ +# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ +# else /* 32-bits */ +# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ +# endif +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define lz4_bswap16(x) _byteswap_ushort(x) +#else +# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + +/************************************** + Memory routines +**************************************/ +#include /* malloc, calloc, free */ +#define ALLOCATOR(n,s) calloc(n,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/************************************** + Includes +**************************************/ +#include "lz4.h" + + +/************************************** + Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct { U16 v; } _PACKED U16_S; +typedef struct { U32 v; } _PACKED U32_S; +typedef struct { U64 v; } _PACKED U64_S; +typedef struct {size_t v;} _PACKED size_t_S; + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# if defined(__SUNPRO_C) || defined(__SUNPRO_CC) +# pragma pack(0) +# else +# pragma pack(pop) +# endif +#endif + +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) +#define AARCH(x) (((size_t_S *)(x))->v) + + +/************************************** + Constants +**************************************/ +#define LZ4_HASHLOG (MEMORY_USAGE-2) +#define HASHTABLESIZE (1 << MEMORY_USAGE) +#define HASHNBCELLS4 (1 << LZ4_HASHLOG) + +#define MINMATCH 4 + +#define COPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH+MINMATCH) +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1)) +#define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */ + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) + +#define ML_BITS 4 +#define ML_MASK ((1U<=e; */ +#else +# define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +# else + int r; + if (!(val>>32)) { r=4; } else { r=0; val>>=32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif +# endif +} + +#else + +FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) +{ +# if defined(LZ4_BIG_ENDIAN) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +# else + int r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif +# endif +} + +#endif + + +/**************************** + Compression functions +****************************/ +FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType) +{ + if (tableType == byU16) + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } + +FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + switch (tableType) + { + case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } + } +} + +FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } + if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } + { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + + +FORCE_INLINE int LZ4_compress_generic( + void* ctx, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + + limitedOutput_directive limitedOutput, + tableType_t tableType, + prefix64k_directive prefix) +{ + const BYTE* ip = (const BYTE*) source; + const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source; + const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source); + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + /* Init conditions */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ + if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0; /* must continue from end of previous block */ + if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend; /* do it now, due to potential early exit */ + if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0; /* Size too large (not within 64K limit) */ + if (inputSize> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (unlikely(forwardIp > mflimit)) { goto _last_literals; } + + forwardH = LZ4_hashPosition(forwardIp, tableType); + ref = LZ4_getPositionOnHash(h, ctx, tableType, base); + LZ4_putPositionOnHash(ip, h, ctx, tableType, base); + + } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip>anchor) && (ref > lowLimit) && (unlikely(ip[-1]==ref[-1]))) { ip--; ref--; } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + if ((limitedOutput) && (unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend))) return 0; /* Check output limit */ + if (length>=(int)RUN_MASK) + { + int len = length-RUN_MASK; + *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(length<>8) > oend))) return 0; /* Check output limit */ + if (length>=(int)ML_MASK) + { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length-=255; *op++ = 255; } + *op++ = (BYTE)length; + } + else *token += (BYTE)(length); + + /* Test end of chunk */ + if (ip > mflimit) { anchor = ip; break; } + + /* Fill table */ + LZ4_putPosition(ip-2, ctx, tableType, base); + + /* Test next position */ + ref = LZ4_getPosition(ip, ctx, tableType, base); + LZ4_putPosition(ip, ctx, tableType, base); + if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } + + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_hashPosition(ip, tableType); + } + +_last_literals: + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<hashTable, 0, sizeof(lz4ds->hashTable)); + lz4ds->bufferStart = base; + lz4ds->base = base; + lz4ds->nextBlock = base; +} + +int LZ4_resetStreamState(void* state, const char* inputBuffer) +{ + if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ + LZ4_init((LZ4_Data_Structure*)state, (const BYTE*)inputBuffer); + return 0; +} + +void* LZ4_create (const char* inputBuffer) +{ + void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure)); + LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer); + return lz4ds; +} + + +int LZ4_free (void* LZ4_Data) +{ + FREEMEM(LZ4_Data); + return (0); +} + + +char* LZ4_slideInputBuffer (void* LZ4_Data) +{ + LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data; + size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB); + + if ( (lz4ds->base - delta > lz4ds->base) /* underflow control */ + || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) ) /* close to 32-bits limit */ + { + size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base; + int nH; + + for (nH=0; nH < HASHNBCELLS4; nH++) + { + if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0; + else lz4ds->hashTable[nH] -= (U32)deltaLimit; + } + memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB); + lz4ds->base = lz4ds->bufferStart; + lz4ds->nextBlock = lz4ds->base + 64 KB; + } + else + { + memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB); + lz4ds->nextBlock -= delta; + lz4ds->base -= delta; + } + + return (char*)(lz4ds->nextBlock); +} + + +int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix); +} + + +int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix); +} + + +/**************************** + Decompression functions +****************************/ + +/* + * This generic decompression function cover all use cases. + * It shall be instanciated several times, using different sets of directives + * Note that it is essential this generic function is really inlined, + * in order to remove useless branches during compilation optimisation. + */ +FORCE_INLINE int LZ4_decompress_generic( + const char* source, + char* dest, + int inputSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ + + int endOnInput, /* endOnOutputSize, endOnInputSize */ + int prefix64k, /* noPrefix, withPrefix */ + int partialDecoding, /* full, partial */ + int targetOutputSize /* only used if partialDecoding==partial */ + ) +{ + /* Local Variables */ + const BYTE* restrict ip = (const BYTE*) source; + const BYTE* ref; + const BYTE* const iend = ip + inputSize; + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + outputSize; + BYTE* cpy; + BYTE* oexit = op + targetOutputSize; + + /*const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; / static reduces speed for LZ4_decompress_safe() on GCC64 */ + const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ + static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; + + + /* Special cases */ + if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ + if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + + + /* Main Loop */ + while (1) + { + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + if ((length=(token>>ML_BITS)) == RUN_MASK) + { + unsigned s=255; + while (((endOnInput)?ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-COPYLENGTH))) + { + if (partialDecoding) + { + if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ + } + else + { + if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ + } + memcpy(op, ip, length); + ip += length; + op += length; + break; /* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; + if ((prefix64k==noPrefix) && (unlikely(ref < (BYTE* const)dest))) goto _output_error; /* Error : offset outside destination buffer */ + + /* get matchlength */ + if ((length=(token&ML_MASK)) == ML_MASK) + { + while ((!endOnInput) || (ipoend-COPYLENGTH-(STEPSIZE-4))) + { + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last 5 bytes must be literals */ + LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH)); + while(op (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) +static inline int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } + +/* +LZ4_compressBound() : + Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible) + primarily useful for memory allocation of output buffer. + inline function is recommended for the general case, + macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation). + + isize : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) +*/ + + +int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); + +/* +LZ4_compress_limitedOutput() : + Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. + If it cannot achieve it, compression will stop, and result of the function will be zero. + This function never writes outside of provided output buffer. + + inputSize : Max supported value is LZ4_MAX_INPUT_VALUE + maxOutputSize : is the size of the destination buffer (which must be already allocated) + return : the number of bytes written in buffer 'dest' + or 0 if the compression fails +*/ + + +int LZ4_decompress_fast (const char* source, char* dest, int outputSize); + +/* +LZ4_decompress_fast() : + outputSize : is the original (uncompressed) size + return : the number of bytes read from the source buffer (in other words, the compressed size) + If the source stream is malformed, the function will stop decoding and return a negative result. + note : This function is a bit faster than LZ4_decompress_safe() + This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet. + Use this function preferably into a trusted environment (data to decode comes from a trusted source). + Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes. +*/ + +int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize); + +/* +LZ4_decompress_safe_partial() : + This function decompress a compressed block of size 'inputSize' at position 'source' + into output buffer 'dest' of size 'maxOutputSize'. + The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, + reducing decompression time. + return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) + Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. + Always control how many bytes were decoded. + If the source stream is detected malformed, the function will stop decoding and return a negative result. + This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets +*/ + + +int LZ4_sizeofState(); +int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); + +/* +These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods. +To know how much memory must be allocated for the compression tables, use : +int LZ4_sizeofState(); + +Note that tables must be aligned on 4-bytes boundaries, otherwise compression will fail (return code 0). + +The allocated memory can be provided to the compressions functions using 'void* state' parameter. +LZ4_compress_withState() and LZ4_compress_limitedOutput_withState() are equivalent to previously described functions. +They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). +*/ + + +/************************************** + Streaming Functions +**************************************/ +void* LZ4_create (const char* inputBuffer); +int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize); +char* LZ4_slideInputBuffer (void* LZ4_Data); +int LZ4_free (void* LZ4_Data); + +/* +These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks. +In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function : + +void* LZ4_create (const char* inputBuffer); +The result of the function is the (void*) pointer on the LZ4 Data Structure. +This pointer will be needed in all other functions. +If the pointer returned is NULL, then the allocation has failed, and compression must be aborted. +The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'. +To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(). +Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), +but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one. +If next block does not begin immediately after the previous one, the compression will fail (return 0). + +When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : +char* LZ4_slideInputBuffer(void* LZ4_Data); +must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer. +Note that, for this function to work properly, minimum size of an input buffer must be 192KB. +==> The memory position where the next input data block must start is provided as the result of the function. + +Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual. + +When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure. +*/ + +int LZ4_sizeofStreamState(); +int LZ4_resetStreamState(void* state, const char* inputBuffer); + +/* +These functions achieve the same result as : +void* LZ4_create (const char* inputBuffer); + +They are provided here to allow the user program to allocate memory using its own routines. + +To know how much space must be allocated, use LZ4_sizeofStreamState(); +Note also that space must be 4-bytes aligned. + +Once space is allocated, you must initialize it using : LZ4_resetStreamState(void* state, const char* inputBuffer); +void* state is a pointer to the space allocated. +It must be aligned on 4-bytes boundaries, and be large enough. +The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). +return value of LZ4_resetStreamState() must be 0 is OK. +Any other value means there was an error (typically, pointer is not aligned on 4-bytes boundaries). +*/ + + +int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize); +int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize); + +/* +*_withPrefix64k() : + These decoding functions work the same as their "normal name" versions, + but can use up to 64KB of data in front of 'char* dest'. + These functions are necessary to decode inter-dependant blocks. +*/ + + +/************************************** + Obsolete Functions +**************************************/ +/* +These functions are deprecated and should no longer be used. +They are provided here for compatibility with existing user programs. +*/ +static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } +static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } + + + +#if defined (__cplusplus) +} +#endif +