diff options
author | Nick Terrell <terrelln@fb.com> | 2022-10-17 13:32:37 -0700 |
---|---|---|
committer | Nick Terrell <terrelln@fb.com> | 2022-10-24 12:12:32 -0700 |
commit | 2aa14b1ab2c41a4fe41efae80d58bb77da91f19f (patch) | |
tree | 17f83bdf97a2a93f8d0aa4d5daf6a92caa7bde79 /lib/zstd/compress/zstd_compress_sequences.c | |
parent | 4782c725c1538aa9ef894ae4a3938db40be7f02c (diff) |
zstd: import usptream v1.5.2
Updates the kernel's zstd library to v1.5.2, the latest zstd release.
The upstream tag it is updated to is `v1.5.2-kernel`, which contains
several cherry-picked commits on top of the v1.5.2 release which are
required for the kernel update. I will create this tag once the PR is
ready to merge, until then reference the temporary upstream branch
`v1.5.2-kernel-cherrypicks`.
I plan to submit this patch as part of the v6.2 merge window.
I've done basic build testing & testing on x86-64, i386, and aarch64.
I'm merging these patches into my `zstd-next` branch, which is pulled
into `linux-next` for further testing.
I've benchmarked BtrFS with zstd compression on a x86-64 machine, and
saw these results. Decompression speed is a small win across the board.
The lower compression levels 1-4 see both compression speed and
compression ratio wins. The higher compression levels see a small
compression speed loss and about neutral ratio. I expect the lower
compression levels to be used much more heavily than the high
compression levels, so this should be a net win.
Level CTime DTime Ratio
1 -2.95% -1.1% -0.7%
3 -3.5% -1.2% -0.5%
5 +3.7% -1.0% +0.0%
7 +3.2% -0.9% +0.0%
9 -4.3% -0.8% +0.1%
Signed-off-by: Nick Terrell <terrelln@fb.com>
Diffstat (limited to 'lib/zstd/compress/zstd_compress_sequences.c')
-rw-r--r-- | lib/zstd/compress/zstd_compress_sequences.c | 31 |
1 files changed, 17 insertions, 14 deletions
diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c index dcfcdc9cc5e8..21ddc1b37acf 100644 --- a/lib/zstd/compress/zstd_compress_sequences.c +++ b/lib/zstd/compress/zstd_compress_sequences.c @@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t { unsigned cost = 0; unsigned s; + + assert(total > 0); for (s = 0; s <= max; ++s) { unsigned norm = (unsigned)((256 * count[s]) / total); if (count[s] != 0 && norm == 0) @@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, assert(nbSeq_1 > 1); assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); (void)entropyWorkspaceSize; - FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed"); + assert(oend >= op); + { size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed"); return NCountSize; } } @@ -310,19 +313,19 @@ ZSTD_encodeSequences_body( FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); if (longOffsets) { U32 const ofBits = ofCodeTable[nbSeq-1]; unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits); BIT_flushBits(&blockStream); } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits, ofBits - extraBits); } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); } BIT_flushBits(&blockStream); @@ -336,8 +339,8 @@ ZSTD_encodeSequences_body( U32 const mlBits = ML_bits[mlCode]; DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", (unsigned)sequences[n].litLength, - (unsigned)sequences[n].matchLength + MINMATCH, - (unsigned)sequences[n].offset); + (unsigned)sequences[n].mlBase + MINMATCH, + (unsigned)sequences[n].offBase); /* 32b*/ /* 64b*/ /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ @@ -348,18 +351,18 @@ ZSTD_encodeSequences_body( BIT_flushBits(&blockStream); /* (7)*/ BIT_addBits(&blockStream, sequences[n].litLength, llBits); if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (longOffsets) { unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_addBits(&blockStream, sequences[n].offBase, extraBits); BIT_flushBits(&blockStream); /* (7)*/ } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + BIT_addBits(&blockStream, sequences[n].offBase >> extraBits, ofBits - extraBits); /* 31 */ } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ } BIT_flushBits(&blockStream); /* (7)*/ DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); @@ -396,7 +399,7 @@ ZSTD_encodeSequences_default( #if DYNAMIC_BMI2 -static TARGET_ATTRIBUTE("bmi2") size_t +static BMI2_TARGET_ATTRIBUTE size_t ZSTD_encodeSequences_bmi2( void* dst, size_t dstCapacity, FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, |