diff --git a/vpi/lz4.c b/vpi/lz4.c
index d58be27a2..39f176faf 100644
--- a/vpi/lz4.c
+++ b/vpi/lz4.c
@@ -47,10 +47,12 @@
 **************************************/
 /* 32 or 64 bits ? */
 #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
+  || defined(__64BIT__)  || defined(__mips64) \
   || defined(__powerpc64__) || defined(__powerpc64le__) \
   || defined(__ppc64__) || defined(__ppc64le__) \
   || defined(__PPC64__) || defined(__PPC64LE__) \
-  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   /* Detects 64 bits mode */
+  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) \
+  || defined(__s390x__) )   /* Detects 64 bits mode */
 #  define LZ4_ARCH64 1
 #else
 #  define LZ4_ARCH64 0
@@ -854,12 +856,12 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
     if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
     if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
 
-    memcpy(safeBuffer, previousDictEnd - dictSize, dictSize);
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
 
     dict->dictionary = (const BYTE*)safeBuffer;
     dict->dictSize = (U32)dictSize;
 
-    return 1;
+    return dictSize;
 }
 
 
@@ -869,9 +871,9 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
 ****************************/
 /*
  * This generic decompression function cover all use cases.
- * It shall be instanciated several times, using different sets of directives
+ * It shall be instantiated several times, using different sets of directives
  * Note that it is essential this generic function is really inlined,
- * in order to remove useless branches during compilation optimisation.
+ * in order to remove useless branches during compilation optimization.
  */
 FORCE_INLINE int LZ4_decompress_generic(
                  const char* source,
@@ -899,8 +901,8 @@ FORCE_INLINE int LZ4_decompress_generic(
     const BYTE* const lowLimit = (const BYTE*)dest - dictSize;
 
     const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0};   /* note : static reduces speed for LZ4_decompress_safe() on GCC64 */
-    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
 
     const int safeDecode = (endOnInput==endOnInputSize);
     const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
@@ -971,24 +973,25 @@ FORCE_INLINE int LZ4_decompress_generic(
             } while (s==255);
             if ((safeDecode) && LZ4_32BITS && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
         }
+        length += MINMATCH;
 
         /* check external dictionary */
         if ((dict==usingExtDict) && (ref < (BYTE* const)dest))
         {
-            if (unlikely(op+length+MINMATCH > oend-LASTLITERALS)) goto _output_error;
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;
 
-            if (length+MINMATCH <= (size_t)(dest-(char*)ref))
+            if (length <= (size_t)(dest-(char*)ref))
             {
                 ref = dictEnd - (dest-(char*)ref);
-                memcpy(op, ref, length+MINMATCH);
-                op += length+MINMATCH;
+                memcpy(op, ref, length);
+                op += length;
             }
             else
             {
                 size_t copySize = (size_t)(dest-(char*)ref);
                 memcpy(op, dictEnd - copySize, copySize);
                 op += copySize;
-                copySize = length+MINMATCH - copySize;
+                copySize = length - copySize;
                 if (copySize > (size_t)((char*)op-dest))   /* overlap */
                 {
                     BYTE* const endOfMatch = op + copySize;
@@ -1005,28 +1008,26 @@ FORCE_INLINE int LZ4_decompress_generic(
         }
 
         /* copy repeated sequence */
+        cpy = op + length;
         if (unlikely((op-ref)<(int)STEPSIZE))
         {
-            const size_t dec64 = dec64table[LZ4_32BITS ? 0 : op-ref];
+            const size_t dec64 = dec64table[op-ref];
             op[0] = ref[0];
             op[1] = ref[1];
             op[2] = ref[2];
             op[3] = ref[3];
             ref += dec32table[op-ref];
             A32(op+4) = A32(ref);
-            op += STEPSIZE; ref -= dec64;
-        } else { LZ4_COPYSTEP(op,ref); }
-        cpy = op + length - (STEPSIZE-4);
+            op += 8; ref -= dec64;
+        } else { LZ4_COPY8(op,ref); }
 
-        if (unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4)))
+        if (unlikely(cpy>oend-12))
         {
             if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last 5 bytes must be literals */
             if (op<oend-COPYLENGTH) LZ4_WILDCOPY(op, ref, (oend-COPYLENGTH));
             while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
         }
-        LZ4_WILDCOPY(op, ref, cpy);
+        else LZ4_WILDCOPY(op, ref, cpy);
         op=cpy;   /* correction */
     }
 
@@ -1153,14 +1154,31 @@ Advanced decoding functions :
     the dictionary must be explicitly provided within parameters
 */
 
+FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, NULL, 64 KB);
+    if ((dictStart+dictSize == dest) && (dictSize >= (int)(64 KB - 1)))
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, NULL, 64 KB);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, dictStart, dictSize);
+}
+
 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
+    //return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
+    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
 
 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize);
+    //return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize);
+    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
+}
+
+/* debug function */
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
 }
 
 
diff --git a/vpi/lz4.h b/vpi/lz4.h
index f8327f0cc..44ada149e 100644
--- a/vpi/lz4.h
+++ b/vpi/lz4.h
@@ -37,13 +37,18 @@
 extern "C" {
 #endif
 
+/*
+ * lz4.h provides raw compression format functions, for optimal performance and integration into programs.
+ * If you need to generate data using an inter-operable format (respecting the framing specification),
+ * please use lz4frame.h instead.
+*/
 
 /**************************************
    Version
 **************************************/
 #define LZ4_VERSION_MAJOR    1    /* for major interface/format changes  */
 #define LZ4_VERSION_MINOR    3    /* for minor interface/format changes  */
-#define LZ4_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 int LZ4_versionNumber (void);
 
@@ -64,12 +69,12 @@ int LZ4_versionNumber (void);
    Simple Functions
 **************************************/
 
-int LZ4_compress        (const char* source, char* dest, int inputSize);
+int LZ4_compress        (const char* source, char* dest, int sourceSize);
 int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 
 /*
 LZ4_compress() :
-    Compresses 'inputSize' bytes from 'source' into 'dest'.
+    Compresses 'sourceSize' bytes from 'source' into 'dest'.
     Destination buffer must be already allocated,
     and must be sized to handle worst cases situations (input data not compressible)
     Worst case size evaluation is provided by function LZ4_compressBound()
@@ -83,16 +88,9 @@ LZ4_decompress_safe() :
     return : the number of bytes decompressed into the destination buffer (necessarily <= maxDecompressedSize)
              If the destination buffer is not large enough, decoding will stop and output an error code (<0).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits :
-             it never writes outside of output buffer, and never reads outside of input buffer.
-             Therefore, it is protected against malicious data packets.
-*/
-
-
-/*
-Note :
-    Should you prefer to explicitly allocate compression-table memory using your own allocation method,
-    use the streaming functions provided below, simply reset the memory area between each call to LZ4_compress_continue()
+             This function is protected against buffer overflow exploits,
+             and never writes outside of output buffer, nor reads outside of input buffer.
+             It is also protected against malicious data packets.
 */
 
 
@@ -104,9 +102,9 @@ Note :
 
 /*
 LZ4_compressBound() :
-    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
-    primarily useful for memory allocation of output buffer.
-    macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (output buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
 
     isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
     return : maximum output size in a "worst case" scenario
@@ -117,16 +115,17 @@ int LZ4_compressBound(int isize);
 
 /*
 LZ4_compress_limitedOutput() :
-    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    Compress 'sourceSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
     If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This saves time and memory on detecting non-compressible (or barely compressible) data.
     This function never writes outside of provided output buffer.
 
-    inputSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+    sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
     maxOutputSize : is the size of the destination buffer (which must be already allocated)
     return : the number of bytes written in buffer 'dest'
-             or 0 if the compression fails
+             or 0 if compression fails
 */
-int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
 
 
 /*
@@ -148,7 +147,7 @@ LZ4_decompress_fast() :
              Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
     note : This function fully respect memory boundaries for properly formed compressed data.
            It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionnally modified data stream (malicious input).
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
            Use this function in trusted environment only (data to decode comes from a trusted source).
 */
 int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
@@ -184,10 +183,9 @@ typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t;
 
 /*
  * LZ4_resetStream
- * Use this function to init a newly allocated LZ4_stream_t structure
- * You can also reset an existing LZ4_stream_t structure
+ * Use this function to init an allocated LZ4_stream_t structure
  */
-void LZ4_resetStream (LZ4_stream_t* LZ4_stream);
+void LZ4_resetStream (LZ4_stream_t* LZ4_streamPtr);
 
 /*
  * If you prefer dynamic allocation methods,
@@ -223,10 +221,10 @@ int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* s
 /*
  * LZ4_saveDict
  * If previously compressed data block is not guaranteed to remain available at its memory location
- * save it into a safe place (char* safeBuffer)
+ * save it into a safer place (char* safeBuffer)
  * Note : you don't need to call LZ4_loadDict() afterwards,
  *        dictionary is immediately usable, you can therefore call again LZ4_compress_continue()
- * Return : 1 if OK, 0 if error
+ * Return : dictionary size in bytes, or 0 if error
  * Note : any dictSize > 64 KB will be interpreted as 64KB.
  */
 int LZ4_saveDict (LZ4_stream_t* LZ4_stream, char* safeBuffer, int dictSize);
@@ -268,9 +266,9 @@ int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
     These decoding functions allow decompression of multiple blocks in "streaming" mode.
     Previously decoded blocks must still be available at the memory position where they were decoded.
     If it's not possible, save the relevant part of decoded data into a safe buffer,
-    and indicate where its new address using LZ4_setDictDecode()
+    and indicate where its new address using LZ4_setStreamDecode()
 */
-int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize);
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
 
 
@@ -279,10 +277,9 @@ Advanced decoding functions :
 *_usingDict() :
     These decoding functions work the same as
     a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue()
-    all together into a single function call.
-    It doesn't use nor update an LZ4_streamDecode_t structure.
+    They don't use nor update an LZ4_streamDecode_t structure.
 */
-int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize);
+int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
 int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);