diff options
Diffstat (limited to 'lib/zstd/common/zstd_internal.h')
| -rw-r--r-- | lib/zstd/common/zstd_internal.h | 175 | 
1 files changed, 84 insertions, 91 deletions
| diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h index fc6f3a9b40c0..93305d9b41bb 100644 --- a/lib/zstd/common/zstd_internal.h +++ b/lib/zstd/common/zstd_internal.h @@ -20,6 +20,7 @@  *  Dependencies  ***************************************/  #include "compiler.h" +#include "cpu.h"  #include "mem.h"  #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */  #include "error_private.h" @@ -47,81 +48,7 @@  #undef MAX  #define MIN(a,b) ((a)<(b) ? (a) : (b))  #define MAX(a,b) ((a)>(b) ? (a) : (b)) - -/* - * Ignore: this is an internal helper. - * - * This is a helper function to help force C99-correctness during compilation. - * Under strict compilation modes, variadic macro arguments can't be empty. - * However, variadic function arguments can be. Using a function therefore lets - * us statically check that at least one (string) argument was passed, - * independent of the compilation flags. - */ -static INLINE_KEYWORD UNUSED_ATTR -void _force_has_format_string(const char *format, ...) { -  (void)format; -} - -/* - * Ignore: this is an internal helper. - * - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ -#define _FORCE_HAS_FORMAT_STRING(...) \ -  if (0) { \ -    _force_has_format_string(__VA_ARGS__); \ -  } - -/* - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ -  if (cond) { \ -    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ -           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ -    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -    RAWLOG(3, ": " __VA_ARGS__); \ -    RAWLOG(3, "\n"); \ -    return ERROR(err); \ -  } - -/* - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ -#define RETURN_ERROR(err, ...) \ -  do { \ -    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ -           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ -    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -    RAWLOG(3, ": " __VA_ARGS__); \ -    RAWLOG(3, "\n"); \ -    return ERROR(err); \ -  } while(0); - -/* - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ -  do { \ -    size_t const err_code = (err); \ -    if (ERR_isError(err_code)) { \ -      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ -             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ -      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ -      RAWLOG(3, ": " __VA_ARGS__); \ -      RAWLOG(3, "\n"); \ -      return err_code; \ -    } \ -  } while(0); +#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))  /*-************************************* @@ -130,7 +57,6 @@ void _force_has_format_string(const char *format, ...) {  #define ZSTD_OPT_NUM    (1<<12)  #define ZSTD_REP_NUM      3                 /* number of repcodes */ -#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)  static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };  #define KB *(1 <<10) @@ -182,7 +108,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy  /* Each table cannot take more than #symbols * FSELog bits */  #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) -static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = { +static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {       0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0,       1, 1, 1, 1, 2, 2, 3, 3, @@ -199,7 +125,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {  #define LL_DEFAULTNORMLOG 6  /* for static allocation */  static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; -static UNUSED_ATTR const U32 ML_bits[MaxML+1] = { +static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {       0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0, @@ -234,12 +160,31 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;  *  Shared functions to include for inlining  *********************************************/  static void ZSTD_copy8(void* dst, const void* src) { +#if defined(ZSTD_ARCH_ARM_NEON) +    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); +#else      ZSTD_memcpy(dst, src, 8); +#endif  } -  #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } + +/* Need to use memmove here since the literal buffer can now be located within +   the dst buffer. In circumstances where the op "catches up" to where the +   literal buffer is, there can be partial overlaps in this call on the final +   copy if the literal is being shifted by less than 16 bytes. */  static void ZSTD_copy16(void* dst, const void* src) { -    ZSTD_memcpy(dst, src, 16); +#if defined(ZSTD_ARCH_ARM_NEON) +    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); +#elif defined(ZSTD_ARCH_X86_SSE2) +    _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); +#elif defined(__clang__) +    ZSTD_memmove(dst, src, 16); +#else +    /* ZSTD_memmove is not inlined properly by gcc */ +    BYTE copy16_buf[16]; +    ZSTD_memcpy(copy16_buf, src, 16); +    ZSTD_memcpy(dst, copy16_buf, 16); +#endif  }  #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } @@ -267,8 +212,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e      BYTE* op = (BYTE*)dst;      BYTE* const oend = op + length; -    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); -      if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {          /* Handle short offset copies. */          do { @@ -331,11 +274,18 @@ typedef enum {  *  Private declarations  *********************************************/  typedef struct seqDef_s { -    U32 offset;         /* Offset code of the sequence */ +    U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */      U16 litLength; -    U16 matchLength; +    U16 mlBase;    /* mlBase == matchLength - MINMATCH */  } seqDef; +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { +    ZSTD_llt_none = 0,             /* no longLengthType */ +    ZSTD_llt_literalLength = 1,    /* represents a long literal */ +    ZSTD_llt_matchLength = 2       /* represents a long match */ +} ZSTD_longLengthType_e; +  typedef struct {      seqDef* sequencesStart;      seqDef* sequences;      /* ptr to end of sequences */ @@ -347,12 +297,12 @@ typedef struct {      size_t maxNbSeq;      size_t maxNbLit; -    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength +    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength       * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment       * the existing value of the litLength or matchLength by 0x10000.       */ -    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ -    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */ +    ZSTD_longLengthType_e   longLengthType; +    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */  } seqStore_t;  typedef struct { @@ -362,18 +312,18 @@ typedef struct {  /*   * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.   */  MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)  {      ZSTD_sequenceLength seqLen;      seqLen.litLength = seq->litLength; -    seqLen.matchLength = seq->matchLength + MINMATCH; +    seqLen.matchLength = seq->mlBase + MINMATCH;      if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { -        if (seqStore->longLengthID == 1) { +        if (seqStore->longLengthType == ZSTD_llt_literalLength) {              seqLen.litLength += 0xFFFF;          } -        if (seqStore->longLengthID == 2) { +        if (seqStore->longLengthType == ZSTD_llt_matchLength) {              seqLen.matchLength += 0xFFFF;          }      } @@ -419,6 +369,41 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus      }  } +/* + * Counts the number of trailing zeros of a `size_t`. + * Most compilers should support CTZ as a builtin. A backup + * implementation is provided if the builtin isn't supported, but + * it may not be terribly efficient. + */ +MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) +{ +    if (MEM_64bits()) { +#       if (__GNUC__ >= 4) +            return __builtin_ctzll((U64)val); +#       else +            static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19, +                                                      4, 25, 14, 28,  9, 34, 20, 56, +                                                      5, 17, 26, 54, 15, 41, 29, 43, +                                                      10, 31, 38, 35, 21, 45, 49, 57, +                                                      63,  6, 12, 18, 24, 27, 33, 55, +                                                      16, 53, 40, 42, 30, 37, 44, 48, +                                                      62, 11, 23, 32, 52, 39, 36, 47, +                                                      61, 22, 51, 46, 60, 50, 59, 58 }; +            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +#       endif +    } else { /* 32 bits */ +#       if (__GNUC__ >= 3) +            return __builtin_ctz((U32)val); +#       else +            static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3, +                                                     30, 22, 20, 15, 25, 17,  4,  8, +                                                     31, 27, 13, 23, 21, 19, 16,  7, +                                                     26, 12, 18,  6, 11,  5, 10,  9 }; +            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +#       endif +    } +} +  /* ZSTD_invalidateRepCodes() :   * ensures next compression will not use repcodes from previous block. @@ -445,6 +430,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,                         const void* src, size_t srcSize); +/* + * @returns true iff the CPU supports dynamic BMI2 dispatch. + */ +MEM_STATIC int ZSTD_cpuSupportsBmi2(void) +{ +    ZSTD_cpuid_t cpuid = ZSTD_cpuid(); +    return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); +}  #endif   /* ZSTD_CCOMMON_H_MODULE */ |