Commit 55afd6e7 by Jonathan Mannancheril

Update zstd to 1.4.4

parent 94f00eb6
...@@ -534,7 +534,7 @@ Files extracted from upstream source: ...@@ -534,7 +534,7 @@ Files extracted from upstream source:
## zstd ## zstd
- Upstream: https://github.com/facebook/zstd - Upstream: https://github.com/facebook/zstd
- Version: 1.4.3 - Version: 1.4.4
- License: BSD-3-Clause - License: BSD-3-Clause
Files extracted from upstream source: Files extracted from upstream source:
......
...@@ -164,7 +164,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) ...@@ -164,7 +164,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val ); _BitScanReverse ( &r, val );
return (unsigned) r; return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val); return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */ # elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val); return 31 - __CLZ(val);
# else /* Software version */ # else /* Software version */
...@@ -244,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) ...@@ -244,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
{ {
size_t const nbBytes = bitC->bitPos >> 3; size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer); MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes; bitC->ptr += nbBytes;
assert(bitC->ptr <= bitC->endPtr);
bitC->bitPos &= 7; bitC->bitPos &= 7;
bitC->bitContainer >>= nbBytes*8; bitC->bitContainer >>= nbBytes*8;
} }
...@@ -260,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) ...@@ -260,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
{ {
size_t const nbBytes = bitC->bitPos >> 3; size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer); MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes; bitC->ptr += nbBytes;
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
......
...@@ -61,6 +61,13 @@ ...@@ -61,6 +61,13 @@
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
#endif #endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
/* force no inlining */ /* force no inlining */
#ifdef _MSC_VER #ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline) # define FORCE_NOINLINE static __declspec(noinline)
...@@ -127,9 +134,14 @@ ...@@ -127,9 +134,14 @@
} \ } \
} }
/* vectorization */ /* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
#if !defined(__clang__) && defined(__GNUC__) #if !defined(__clang__) && defined(__GNUC__)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else
# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
# endif
#else #else
# define DONT_VECTORIZE # define DONT_VECTORIZE
#endif #endif
......
...@@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste ...@@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
*******************************************/ *******************************************/
/* FSE buffer bounds */ /* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512 #define FSE_NCOUNTBOUND 512
#define FSE_BLOCKBOUND(size) (size + (size>>7)) #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
......
...@@ -52,7 +52,9 @@ ...@@ -52,7 +52,9 @@
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
/* check and forward error code */ /* check and forward error code */
#ifndef CHECK_F
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
#endif
/* ************************************************************** /* **************************************************************
......
...@@ -47,6 +47,79 @@ extern "C" { ...@@ -47,6 +47,79 @@ extern "C" {
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
/* detects whether we are being compiled under msan */
#if defined (__has_feature)
# if __has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1
# endif
#endif
#if defined (MEMORY_SANITIZER)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stdint.h> /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
#endif
/* detects whether we are being compiled under asan */
#if defined (__has_feature)
# if __has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1
# endif
#elif defined(__SANITIZE_ADDRESS__)
# define ADDRESS_SANITIZER 1
#endif
#if defined (ADDRESS_SANITIZER)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
/*-************************************************************** /*-**************************************************************
* Basic Types * Basic Types
......
...@@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ...@@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ctx->queueTail = 0; ctx->queueTail = 0;
ctx->numThreadsBusy = 0; ctx->numThreadsBusy = 0;
ctx->queueEmpty = 1; ctx->queueEmpty = 1;
(void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); {
(void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); int error = 0;
(void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
if (error) { POOL_free(ctx); return NULL; }
}
ctx->shutdown = 0; ctx->shutdown = 0;
/* Allocate space for the thread handles */ /* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
* This file will hold wrapper for systems, which do not support pthreads * This file will hold wrapper for systems, which do not support pthreads
*/ */
#include "threading.h"
/* create fake symbol to avoid empty translation unit warning */ /* create fake symbol to avoid empty translation unit warning */
int g_ZSTD_threading_useless_symbol; int g_ZSTD_threading_useless_symbol;
...@@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol; ...@@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol;
/* === Dependencies === */ /* === Dependencies === */
#include <process.h> #include <process.h>
#include <errno.h> #include <errno.h>
#include "threading.h"
/* === Implementation === */ /* === Implementation === */
...@@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) ...@@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
} }
#endif /* ZSTD_MULTITHREAD */ #endif /* ZSTD_MULTITHREAD */
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
#include <stdlib.h>
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
if (!*mutex)
return 1;
return pthread_mutex_init(*mutex, attr);
}
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
{
if (!*mutex)
return 0;
{
int const ret = pthread_mutex_destroy(*mutex);
free(*mutex);
return ret;
}
}
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
if (!*cond)
return 1;
return pthread_cond_init(*cond, attr);
}
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
{
if (!*cond)
return 0;
{
int const ret = pthread_cond_destroy(*cond);
free(*cond);
return ret;
}
}
#endif
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#ifndef THREADING_H_938743 #ifndef THREADING_H_938743
#define THREADING_H_938743 #define THREADING_H_938743
#include "debug.h"
#if defined (__cplusplus) #if defined (__cplusplus)
extern "C" { extern "C" {
#endif #endif
...@@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); ...@@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
*/ */
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
/* === POSIX Systems === */ /* === POSIX Systems === */
# include <pthread.h> # include <pthread.h>
#if DEBUGLEVEL < 1
#define ZSTD_pthread_mutex_t pthread_mutex_t #define ZSTD_pthread_mutex_t pthread_mutex_t
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
...@@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); ...@@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) #define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#else /* DEBUGLEVEL >= 1 */
/* Debug implementation of threading.
* In this implementation we use pointers for mutexes and condition variables.
* This way, if we forget to init/destroy them the program will crash or ASAN
* will report leaks.
*/
#define ZSTD_pthread_mutex_t pthread_mutex_t*
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
#define ZSTD_pthread_cond_t pthread_cond_t*
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#endif
#else /* ZSTD_MULTITHREAD not defined */ #else /* ZSTD_MULTITHREAD not defined */
/* No multithreading support */ /* No multithreading support */
......
...@@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } ...@@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 8 #define WILDCOPY_OVERLENGTH 32
#define VECLEN 16 #define WILDCOPY_VECLEN 16
typedef enum { typedef enum {
ZSTD_no_overlap, ZSTD_no_overlap,
ZSTD_overlap_src_before_dst, ZSTD_overlap_src_before_dst
/* ZSTD_overlap_dst_before_src, */ /* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e; } ZSTD_overlap_e;
/*! ZSTD_wildcopy() : /*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* @param ovtype controls the overlap detection
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
* The src buffer must be before the dst buffer.
*/
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{ {
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length; BYTE* const oend = op + length;
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
do if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
COPY8(op, ip) /* Handle short offset copies. */
while (op < oend); do {
} COPY8(op, ip)
else { } while (op < oend);
if ((length & 8) == 0) } else {
COPY8(op, ip); assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
do { /* Separate out the first two COPY16() calls because the copy length is
* almost certain to be short, so the branches have different
* probabilities.
* On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
* On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
*/
COPY16(op, ip); COPY16(op, ip);
}
while (op < oend);
}
}
/*! ZSTD_wildcopy_16min() :
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(length >= 8);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip); COPY16(op, ip);
} if (op >= oend) return;
while (op < oend); do {
COPY16(op, ip);
COPY16(op, ip);
}
while (op < oend);
} }
} }
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
{
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = (BYTE*)dstEnd;
do
COPY8(op, ip)
while (op < oend);
}
/*-******************************************* /*-*******************************************
* Private declarations * Private declarations
...@@ -323,7 +300,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus ...@@ -323,7 +300,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
_BitScanReverse(&r, val); _BitScanReverse(&r, val);
return (unsigned)r; return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return 31 - __builtin_clz(val); return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */ # elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val); return 31 - __CLZ(val);
# else /* Software version */ # else /* Software version */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* Dependencies * Dependencies
***************************************/ ***************************************/
#include "zstd_internal.h" #include "zstd_internal.h"
#include "zstd_cwksp.h"
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h" # include "zstdmt_compress.h"
#endif #endif
...@@ -192,6 +193,13 @@ typedef struct { ...@@ -192,6 +193,13 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */ size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t; } rawSeqStore_t;
typedef struct {
int collectSequences;
ZSTD_Sequence* seqStart;
size_t seqIndex;
size_t maxSequences;
} SeqCollector;
struct ZSTD_CCtx_params_s { struct ZSTD_CCtx_params_s {
ZSTD_format_e format; ZSTD_format_e format;
ZSTD_compressionParameters cParams; ZSTD_compressionParameters cParams;
...@@ -203,6 +211,9 @@ struct ZSTD_CCtx_params_s { ...@@ -203,6 +211,9 @@ struct ZSTD_CCtx_params_s {
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0. * No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */ * There is no guarantee on compressed block size */
int srcSizeHint; /* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref; ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode; ZSTD_literalCompressionMode_e literalCompressionMode;
...@@ -228,9 +239,7 @@ struct ZSTD_CCtx_s { ...@@ -228,9 +239,7 @@ struct ZSTD_CCtx_s {
ZSTD_CCtx_params appliedParams; ZSTD_CCtx_params appliedParams;
U32 dictID; U32 dictID;
int workSpaceOversizedDuration; ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
void* workSpace;
size_t workSpaceSize;
size_t blockSize; size_t blockSize;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize; unsigned long long consumedSrcSize;
...@@ -238,6 +247,8 @@ struct ZSTD_CCtx_s { ...@@ -238,6 +247,8 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState; XXH64_state_t xxhState;
ZSTD_customMem customMem; ZSTD_customMem customMem;
size_t staticSize; size_t staticSize;
SeqCollector seqCollector;
int isFirstBlock;
seqStore_t seqStore; /* sequences storage ptrs */ seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */ ldmState_t ldmState; /* long distance matching state */
...@@ -337,26 +348,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) ...@@ -337,26 +348,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
return (srcSize >> minlog) + 2; return (srcSize >> minlog) + 2;
} }
/*! ZSTD_safecopyLiterals() :
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
* large copies.
*/
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
assert(iend > ilimit_w);
if (ip <= ilimit_w) {
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
op += ilimit_w - ip;
ip = ilimit_w;
}
while (ip < iend) *op++ = *ip++;
}
/*! ZSTD_storeSeq() : /*! ZSTD_storeSeq() :
* Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
* `offsetCode` : distance to match + 3 (values 1-3 are repCodes). * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* `mlBase` : matchLength - MINMATCH * `mlBase` : matchLength - MINMATCH
* Allowed to overread literals up to litLimit.
*/ */
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) HINT_INLINE UNUSED_ATTR
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{ {
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL; static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start); { U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
} }
#endif #endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */ /* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); assert(literals + litLength <= litLimit);
if (litEnd <= litLimit_w) {
/* Common case we can use wildcopy.
* First copy 16 bytes, because literals are likely short.
*/
assert(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
}
} else {
ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
}
seqStorePtr->lit += litLength; seqStorePtr->lit += litLength;
/* literal Length */ /* literal Length */
...@@ -368,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v ...@@ -368,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->sequences[0].litLength = (U16)litLength; seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */ /* match offset */
seqStorePtr->sequences[0].offset = offsetCode + 1; seqStorePtr->sequences[0].offset = offCode + 1;
/* match Length */ /* match Length */
if (mlBase>0xFFFF) { if (mlBase>0xFFFF) {
...@@ -910,7 +952,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( ...@@ -910,7 +952,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize, const void* dict, size_t dictSize,
const ZSTD_CDict* cdict, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
void ZSTD_resetSeqStore(seqStore_t* ssPtr); void ZSTD_resetSeqStore(seqStore_t* ssPtr);
...@@ -925,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, ...@@ -925,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
ZSTD_dictContentType_e dictContentType, ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm, ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize); unsigned long long pledgedSrcSize);
/* ZSTD_compress_advanced_internal() : /* ZSTD_compress_advanced_internal() :
...@@ -934,7 +976,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, ...@@ -934,7 +976,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const void* dict,size_t dictSize, const void* dict,size_t dictSize,
ZSTD_CCtx_params params); const ZSTD_CCtx_params* params);
/* ZSTD_writeLastEmptyBlock() : /* ZSTD_writeLastEmptyBlock() :
......
...@@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_strategy strategy, int disableLiteralCompression, ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workspace, size_t wkspSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2) const int bmi2)
{ {
size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const minGain = ZSTD_minGain(srcSize, strategy);
...@@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
{ HUF_repeat repeat = prevHuf->repeatMode; { HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, cLitSize = singleStream ?
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) HUF_compress1X_repeat(
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, ostart+lhSize, dstCapacity-lhSize, src, srcSize,
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); 255, 11, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
255, 11, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
/* reused the existing table */ /* reused the existing table */
hType = set_repeat; hType = set_repeat;
......
...@@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ...@@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_strategy strategy, int disableLiteralCompression, ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workspace, size_t wkspSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2); const int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */ #endif /* ZSTD_COMPRESS_LITERALS_H */
...@@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, ...@@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
const BYTE* codeTable, size_t nbSeq, const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize, const FSE_CTable* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize) void* entropyWorkspace, size_t entropyWorkspaceSize)
{ {
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
const BYTE* const oend = op + dstCapacity; const BYTE* const oend = op + dstCapacity;
...@@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, ...@@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
memcpy(nextCTable, prevCTable, prevCTableSize); memcpy(nextCTable, prevCTable, prevCTableSize);
return 0; return 0;
case set_basic: case set_basic:
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */
return 0; return 0;
case set_compressed: { case set_compressed: {
S16 norm[MaxSeq + 1]; S16 norm[MaxSeq + 1];
...@@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, ...@@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize); FORWARD_IF_ERROR(NCountSize);
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
return NCountSize; return NCountSize;
} }
} }
......
...@@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, ...@@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
const BYTE* codeTable, size_t nbSeq, const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize, const FSE_CTable* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize); void* entropyWorkspace, size_t entropyWorkspaceSize);
size_t ZSTD_encodeSequences( size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
......
...@@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( ...@@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored; goto _match_stored;
} }
...@@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( ...@@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored; goto _match_stored;
} }
...@@ -247,7 +247,7 @@ _match_found: ...@@ -247,7 +247,7 @@ _match_found:
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored: _match_stored:
/* match found */ /* match found */
...@@ -278,7 +278,7 @@ _match_stored: ...@@ -278,7 +278,7 @@ _match_stored:
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2; ip += repLength2;
...@@ -297,7 +297,7 @@ _match_stored: ...@@ -297,7 +297,7 @@ _match_stored:
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
ip += rLength; ip += rLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
...@@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( ...@@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else { } else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
...@@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( ...@@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
...@@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( ...@@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
} }
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else { } else {
ip += ((ip-anchor) >> kSearchStrength) + 1; ip += ((ip-anchor) >> kSearchStrength) + 1;
...@@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( ...@@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2; ip += repLength2;
......
...@@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic( ...@@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic(
/* store sequence */ /* store sequence */
_storeSequence: _storeSequence:
{ size_t const litLength = start - anchor; { size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength; anchor = ip = start + matchLength;
} }
...@@ -828,7 +828,7 @@ _storeSequence: ...@@ -828,7 +828,7 @@ _storeSequence:
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength; ip += matchLength;
anchor = ip; anchor = ip;
continue; continue;
...@@ -843,7 +843,7 @@ _storeSequence: ...@@ -843,7 +843,7 @@ _storeSequence:
/* store sequence */ /* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength; ip += matchLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
...@@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ...@@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
/* store sequence */ /* store sequence */
_storeSequence: _storeSequence:
{ size_t const litLength = start - anchor; { size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength; anchor = ip = start + matchLength;
} }
...@@ -1066,7 +1066,7 @@ _storeSequence: ...@@ -1066,7 +1066,7 @@ _storeSequence:
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength; ip += matchLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
......
...@@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params) ...@@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
{ {
size_t const ldmHSize = ((size_t)1) << params.hashLog; size_t const ldmHSize = ((size_t)1) << params.hashLog;
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
size_t const ldmBucketSize = size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
((size_t)1) << (params.hashLog - ldmBucketSizeLog); size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0; return params.enableLdm ? totalSize : 0;
} }
...@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ...@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[i] = rep[i-1]; rep[i] = rep[i-1];
rep[0] = sequence.offset; rep[0] = sequence.offset;
/* Store the sequence */ /* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE, sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH); sequence.matchLength - MINMATCH);
ip += sequence.matchLength; ip += sequence.matchLength;
......
...@@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ ...@@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
assert(anchor + llen <= iend); assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH); ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
anchor += advance; anchor += advance;
ip = anchor; ip = anchor;
} } } }
......
...@@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription) ...@@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
/* init */ /* init */
if (job->cdict) { if (job->cdict) {
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize); size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
assert(job->firstJob); /* only allowed for first job */ assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) JOB_ERROR(initError); if (ZSTD_isError(initError)) JOB_ERROR(initError);
} else { /* srcStart points at reloaded section */ } else { /* srcStart points at reloaded section */
...@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription) ...@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast, ZSTD_dtlm_fast,
NULL, /*cdict*/ NULL, /*cdict*/
jobParams, pledgedSrcSize); &jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) JOB_ERROR(initError); if (ZSTD_isError(initError)) JOB_ERROR(initError);
} } } }
...@@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) ...@@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
unsigned jobID; unsigned jobID;
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
/* Copy the mutex/cond out */
ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
mtctx->jobs[jobID].cSize = 0; /* Clear the job description, but keep the mutex/cond */
memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
mtctx->jobs[jobID].job_mutex = mutex;
mtctx->jobs[jobID].job_cond = cond;
} }
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
mtctx->inBuff.buffer = g_nullBuffer; mtctx->inBuff.buffer = g_nullBuffer;
mtctx->inBuff.filled = 0; mtctx->inBuff.filled = 0;
mtctx->allJobsCompleted = 1; mtctx->allJobsCompleted = 1;
...@@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, ...@@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
/* Sets parameters relevant to the compression job, /* Sets parameters relevant to the compression job,
* initializing others to default values. */ * initializing others to default values. */
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
{ {
ZSTD_CCtx_params jobParams = params; ZSTD_CCtx_params jobParams = *params;
/* Clear parameters related to multithreading */ /* Clear parameters related to multithreading */
jobParams.forceWindow = 0; jobParams.forceWindow = 0;
jobParams.nbWorkers = 0; jobParams.nbWorkers = 0;
...@@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) ...@@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
/* ===== Multi-threaded compression ===== */ /* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */ /* ------------------------------------------ */
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
{ {
unsigned jobLog; unsigned jobLog;
if (params.ldmParams.enableLdm) { if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized. /* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize * In which case, it's preferable to determine the jobSize
* based on chainLog instead. */ * based on chainLog instead. */
jobLog = MAX(21, params.cParams.chainLog + 4); jobLog = MAX(21, params->cParams.chainLog + 4);
} else { } else {
jobLog = MAX(20, params.cParams.windowLog + 2); jobLog = MAX(20, params->cParams.windowLog + 2);
} }
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
} }
...@@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) ...@@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
return ovlog; return ovlog;
} }
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params) static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
{ {
int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy); int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog); int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
assert(0 <= overlapRLog && overlapRLog <= 8); assert(0 <= overlapRLog && overlapRLog <= 8);
if (params.ldmParams.enableLdm) { if (params->ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized. /* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize * In which case, it's preferable to determine the jobSize
* based on chainLog instead. * based on chainLog instead.
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog; - overlapRLog;
} }
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(4, "overlapLog : %i", params.overlapLog); DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog); DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog; return (ovLog==0) ? 0 : (size_t)1 << ovLog;
} }
static unsigned static unsigned
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
{ {
assert(nbWorkers>0); assert(nbWorkers>0);
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
...@@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal( ...@@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
const ZSTD_CDict* cdict, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params) ZSTD_CCtx_params params)
{ {
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
size_t const overlapSize = ZSTDMT_computeOverlapSize(params); size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
const char* const srcStart = (const char*)src; const char* const srcStart = (const char*)src;
...@@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal( ...@@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
} }
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
...@@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal( ...@@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) { if (mtctx->singleBlockingThread) {
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
assert(singleThreadParams.nbWorkers == 0); assert(singleThreadParams.nbWorkers == 0);
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
dict, dictSize, cdict, dict, dictSize, cdict,
singleThreadParams, pledgedSrcSize); &singleThreadParams, pledgedSrcSize);
} }
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
...@@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal( ...@@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
mtctx->cdict = cdict; mtctx->cdict = cdict;
} }
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params); mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
mtctx->targetSectionSize = params.jobSize; mtctx->targetSectionSize = params.jobSize;
if (mtctx->targetSectionSize == 0) { if (mtctx->targetSectionSize == 0) {
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
} }
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
......
...@@ -61,7 +61,9 @@ ...@@ -61,7 +61,9 @@
* Error Management * Error Management
****************************************************************/ ****************************************************************/
#define HUF_isError ERR_isError #define HUF_isError ERR_isError
#ifndef CHECK_F
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
#endif
/* ************************************************************** /* **************************************************************
......
...@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } ...@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
static size_t ZSTD_startingInputLength(ZSTD_format_e format) static size_t ZSTD_startingInputLength(ZSTD_format_e format)
{ {
size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
ZSTD_FRAMEHEADERSIZE_PREFIX;
ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
/* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
return startingInputLength; return startingInputLength;
...@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) ...@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{ {
unsigned long long totalDstSize = 0; unsigned long long totalDstSize = 0;
while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
U32 const magicNumber = MEM_readLE32(src); U32 const magicNumber = MEM_readLE32(src);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
...@@ -629,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ...@@ -629,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
/* check */ /* check */
RETURN_ERROR_IF( RETURN_ERROR_IF(
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
srcSize_wrong); srcSize_wrong);
/* Frame Header */ /* Frame Header */
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
srcSize_wrong); srcSize_wrong);
...@@ -714,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, ...@@ -714,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
dictSize = ZSTD_DDict_dictSize(ddict); dictSize = ZSTD_DDict_dictSize(ddict);
} }
while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) { while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) { if (ZSTD_isLegacy(src, srcSize)) {
...@@ -1098,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, ...@@ -1098,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
for (i=0; i<3; i++) { for (i=0; i<3; i++) {
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
dictionary_corrupted); dictionary_corrupted);
entropy->rep[i] = rep; entropy->rep[i] = rep;
} } } }
...@@ -1267,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, ...@@ -1267,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
{ {
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
ZSTD_clearDict(dctx); ZSTD_clearDict(dctx);
if (dict && dictSize >= 8) { if (dict && dictSize != 0) {
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
dctx->ddict = dctx->ddictLocal; dctx->ddict = dctx->ddictLocal;
...@@ -1300,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz ...@@ -1300,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
/* ZSTD_initDStream_usingDict() : /* ZSTD_initDStream_usingDict() :
* return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX. * return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */ * this function cannot fail */
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
{ {
DEBUGLOG(4, "ZSTD_initDStream_usingDict"); DEBUGLOG(4, "ZSTD_initDStream_usingDict");
FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
return ZSTD_FRAMEHEADERSIZE_PREFIX; return ZSTD_startingInputLength(zds->format);
} }
/* note : this variant can't fail */ /* note : this variant can't fail */
...@@ -1324,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) ...@@ -1324,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{ {
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
return ZSTD_FRAMEHEADERSIZE_PREFIX; return ZSTD_startingInputLength(dctx->format);
} }
/* ZSTD_resetDStream() : /* ZSTD_resetDStream() :
* return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX. * return : expected size, aka ZSTD_startingInputLength().
* this function cannot fail */ * this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx) size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{ {
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
return ZSTD_FRAMEHEADERSIZE_PREFIX; return ZSTD_startingInputLength(dctx->format);
} }
...@@ -1564,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB ...@@ -1564,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput; zds->lhSize += remainingInput;
} }
input->pos = input->size; input->pos = input->size;
return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
} }
assert(ip != NULL); assert(ip != NULL);
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment