Commit b81fa47f by Rémi Verschelde

zstd: Update to upstream version 1.4.1

parent 43793958
......@@ -200,6 +200,7 @@ Important: Some files have Godot-made changes.
They are marked with `// -- GODOT start --` and `// -- GODOT end --`
comments.
## libtheora
- Upstream: https://www.theora.org
......@@ -262,18 +263,6 @@ changes to ensure they build for Javascript/HTML5. Those
changes are marked with `// -- GODOT --` comments.
## wslay
- Upstream: https://github.com/tatsuhiro-t/wslay
- Version: 1.1.0
- License: MIT
File extracted from upstream release tarball:
- All `*.c` and `*.h` in `lib/` and `lib/includes/`
- `wslay.h` has a small Godot addition to fix MSVC build.
See `thirdparty/wslay/msvcfix.diff`
## mbedtls
- Upstream: https://tls.mbed.org/
......@@ -508,6 +497,19 @@ They can be reapplied using the patches included in the `vhacd`
folder.
## wslay
- Upstream: https://github.com/tatsuhiro-t/wslay
- Version: 1.1.0
- License: MIT
File extracted from upstream release tarball:
- All `*.c` and `*.h` in `lib/` and `lib/includes/`
- `wslay.h` has a small Godot addition to fix MSVC build.
See `thirdparty/wslay/msvcfix.diff`
## xatlas
- Upstream: https://github.com/jpcy/xatlas
......@@ -536,7 +538,7 @@ Files extracted from upstream source:
## zstd
- Upstream: https://github.com/facebook/zstd
- Version: 1.4.0
- Version: 1.4.1
- License: BSD-3-Clause
Files extracted from upstream source:
......
......@@ -127,6 +127,13 @@
} \
}
/* vectorization */
#if !defined(__clang__) && defined(__GNUC__)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
#else
# define DONT_VECTORIZE
#endif
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */
......
......@@ -34,7 +34,6 @@
#endif
#include "xxhash.h" /* XXH_reset, update, digest */
#if defined (__cplusplus)
extern "C" {
#endif
......@@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
* Shared functions to include for inlining
*********************************************/
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 8
#define VECLEN 16
typedef enum {
ZSTD_no_overlap,
ZSTD_overlap_src_before_dst,
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
#define WILDCOPY_OVERLENGTH 8
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
do
COPY8(op, ip)
while (op < oend);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
}
/*! ZSTD_wildcopy_16min() :
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(length >= 8);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
}
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
......
......@@ -33,13 +33,13 @@ extern "C" {
***************************************/
#define kSearchStrength 8
#define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-*************************************
......@@ -128,21 +128,20 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more data */
U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
U32 nextToUpdate; /* index from which to continue table update */
U32 nextToUpdate3; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
optState_t opt; /* optimal parser state */
const ZSTD_matchState_t * dictMatchState;
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
};
......@@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s {
int compressionLevel;
int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */
ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode;
......@@ -324,7 +326,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
seqStorePtr->lit += litLength;
/* literal Length */
......@@ -564,6 +566,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
/*-*************************************
* Round buffer management
***************************************/
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
#endif
/* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */
......@@ -675,31 +680,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
* This allows a simple check that index >= lowLimit to see if index is valid.
* This must be called before a block compression call, with srcEnd as the block
* source end.
* It ensures index is valid as long as index >= lowLimit.
* This must be called before a block compression call.
*
* loadedDictEnd is only defined if a dictionary is in use for current compression.
* As the name implies, loadedDictEnd represents the index at end of dictionary.
* The value lies within context's referential, it can be directly compared to blockEndIdx.
*
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
* This is because dictionaries are allowed to be referenced as long as the last
* byte of the dictionary is in the window, but once they are out of range,
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0.
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
* This is because dictionaries are allowed to be referenced fully
* as long as the last byte of the dictionary is in the window.
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
* In normal dict mode, the dict is between lowLimit and dictLimit. In
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
* is below them. forceWindow and dictMatchState are therefore incompatible.
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
* In dictMatchState mode, lowLimit and dictLimit are the same,
* and the dictionary is below them.
* forceWindow and dictMatchState are therefore incompatible.
*/
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
(unsigned)blockEndIdx, (unsigned)maxDist);
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
/* - When there is no dictionary : loadedDictEnd == 0.
In which case, the test (blockEndIdx > maxDist) is merely to avoid
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
- When there is a standard dictionary :
Index referential is copied from the dictionary,
which means it starts from 0.
In which case, loadedDictEnd == dictSize,
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
since `blockEndIdx` also starts from zero.
- When there is an attached dictionary :
loadedDictEnd is expressed within the referential of the context,
so it can be directly compared against blockEndIdx.
*/
if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
......@@ -708,10 +731,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit;
}
if (loadedDictEndPtr)
*loadedDictEndPtr = 0;
if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size. */
MEM_STATIC void
ZSTD_checkDictValidity(ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}
......
......@@ -13,7 +13,8 @@
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
......@@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } }
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
......@@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validStartIndex = ms->window.dictLimit;
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
......@@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
......@@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog;
/* otherwise, we would get index underflow when translating a dict index
* into a local index */
/* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
/* ensure there will be no no underflow
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
......@@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
......@@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
......@@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
......@@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast_dictMatchState(
......@@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 dictStartIndex = ms->window.lowLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validLow = ms->window.lowLimit;
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 prefixStartIndex = ms->window.dictLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
......@@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
......@@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
......@@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
......
......@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
U32 const windowValid = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow);
......@@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const windowLow = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
......@@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowValid = ms->window.lowLimit;
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
......@@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
/* init */
ip += (dictAndPrefixLength == 0);
ms->nextToUpdate3 = ms->nextToUpdate;
if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
......@@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
U32 offset_1 = rep[0], offset_2 = rep[1];
/* init */
ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart);
/* Match Loop */
......
......@@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, src);
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
}
/* 2. We enforce the maximum offset allowed.
......
......@@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
assert(flushed <= produced);
assert(jobPtr->consumed <= jobPtr->src.size);
toFlush = produced - flushed;
if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
/* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
/* if toFlush==0, nothing is available to flush.
* However, jobID is expected to still be active:
* if jobID was already completed and fully flushed,
* ZSTDMT_flushProduced() should have already moved onto next job.
* Therefore, some input has not yet been consumed. */
if (toFlush==0) {
assert(jobPtr->consumed < jobPtr->src.size);
}
}
......@@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
{
if (params.ldmParams.enableLdm)
unsigned jobLog;
if (params.ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead. */
return MAX(21, params.cParams.chainLog + 4);
return MAX(20, params.cParams.windowLog + 2);
jobLog = MAX(21, params.cParams.chainLog + 4);
} else {
jobLog = MAX(20, params.cParams.windowLog + 2);
}
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
}
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
......@@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog;
}
assert(0 <= ovLog && ovLog <= 30);
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
......@@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) {
......@@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
if (mtctx->targetSectionSize == 0) {
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
}
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
......
......@@ -50,6 +50,7 @@
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB)
#endif
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
......
......@@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported);
return skippableHeaderSize + sizeU32;
{
size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
return skippableSize;
}
}
/** ZSTD_findDecompressedSize() :
......@@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize))
return skippableSize;
if (srcSize < skippableSize) {
if (ZSTD_isError(skippableSize)) {
return ZSTD_CONTENTSIZE_ERROR;
}
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
......@@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
frameSizeInfo.compressedSize <= srcSize);
return frameSizeInfo;
} else {
const BYTE* ip = (const BYTE*)src;
......@@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
return frameSizeInfo.compressedSize;
}
/** ZSTD_decompressBound() :
* compatible with legacy mode
* `src` must point to the start of a ZSTD frame or a skippeable frame
......@@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
return ZSTD_CONTENTSIZE_ERROR;
assert(srcSize >= compressedSize);
src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize;
bound += decompressedBound;
......@@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize))
return skippableSize;
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
FORWARD_IF_ERROR(skippableSize);
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
......
......@@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
*nbSeqPtr = nbSeq;
/* FSE table descriptors */
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
......@@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
/* copy Literals */
ZSTD_copy8(op, *litPtr);
if (sequence.litLength > 8)
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr);
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
......@@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op);
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}
......@@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
/* copy Literals */
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
if (sequence.litLength > 8)
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
......@@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op);
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}
......@@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
}
FORCE_INLINE_TEMPLATE size_t
DONT_VECTORIZE
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
......@@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
ZSTD_STATIC_ASSERT(
BIT_DStream_unfinished < BIT_DStream_completed &&
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
BIT_DStream_completed < BIT_DStream_overflow);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
......@@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
/* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
RETURN_ERROR_IF(nbSeq, corruption_detected);
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
......@@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static TARGET_ATTRIBUTE("bmi2") size_t
DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
......
......@@ -71,7 +71,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4
#define ZSTD_VERSION_RELEASE 0
#define ZSTD_VERSION_RELEASE 1
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
......@@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
/***************************************
* Default constant
***************************************/
/* *************************************
* Default constant
***************************************/
#ifndef ZSTD_CLEVEL_DEFAULT
# define ZSTD_CLEVEL_DEFAULT 3
#endif
/***************************************
* Constants
***************************************/
/* *************************************
* Constants
***************************************/
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
......@@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres
***************************************/
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
* it is recommended to allocate a context just once,
* and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory.
* Use one context per thread for parallel execution in multi-threaded environments. */
* Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical.
* Note 2 : In multi-threaded environments,
* use one different context per thread for parallel execution.
*/
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
......@@ -380,6 +385,7 @@ typedef enum {
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
......@@ -389,6 +395,7 @@ typedef enum {
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
} ZSTD_cParameter;
typedef struct {
......@@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
/* These buffer sizes are softly recommended.
* They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
* Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
* reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
*
* However, note that these recommendations are from the perspective of a C caller program.
* If the streaming interface is invoked from some other language,
* especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
* a major performance rule is to reduce crossing such interface to an absolute minimum.
* It's not rare that performance ends being spent more into the interface, rather than compression itself.
* In which cases, prefer using large buffers, as large as practical,
* for both input and output, to reduce the nb of roundtrips.
*/
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
/*******************************************************************************
* This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
* ZSTD_compressStream2(). It is redundant, but is still fully supported.
/* *****************************************************************************
* This following is a legacy streaming API.
* It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
* It is redundant, but remains fully supported.
* Advanced parameters and dictionary compression can only be used through the
* new API.
******************************************************************************/
/**
/*!
* Equivalent to:
*
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
......@@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
*/
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
/**
/*!
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
* returns the number of bytes left to flush (if non-zero and not an error).
* returns the minimum nb of bytes left to flush (if non-zero and not an error).
*/
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
......@@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#endif /* ZSTD_H_235446 */
/****************************************************************************************
/* **************************************************************************************
* ADVANCED AND EXPERIMENTAL FUNCTIONS
****************************************************************************************
* The definitions in the following section are considered experimental.
......@@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#define ZSTD_LDM_HASHRATELOG_MIN 0
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
/* internal */
#define ZSTD_HASHLOG3_MAX 17
......@@ -1162,7 +1189,7 @@ typedef enum {
* however it does mean that all frame data must be present and valid. */
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
/** ZSTD_decompressBound() :
/*! ZSTD_decompressBound() :
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames
* `srcSize` must be the _exact_ size of this series
* (i.e. there should be a frame boundary at `src + srcSize`)
......@@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
*/
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
/* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
......@@ -1843,7 +1875,7 @@ typedef struct {
unsigned checksumFlag;
} ZSTD_frameHeader;
/** ZSTD_getFrameHeader() :
/*! ZSTD_getFrameHeader() :
* decode Frame Header, or requires larger `srcSize`.
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment