diff options
Diffstat (limited to 'lib/zstd/compress/zstd_double_fast.c')
| -rw-r--r-- | lib/zstd/compress/zstd_double_fast.c | 413 | 
1 files changed, 295 insertions, 118 deletions
diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c index b0424d23ac57..76933dea2624 100644 --- a/lib/zstd/compress/zstd_double_fast.c +++ b/lib/zstd/compress/zstd_double_fast.c @@ -48,10 +48,216 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,  FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( +size_t ZSTD_compressBlock_doubleFast_noDict_generic( +        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], +        void const* src, size_t srcSize, U32 const mls /* template */) +{ +    ZSTD_compressionParameters const* cParams = &ms->cParams; +    U32* const hashLong = ms->hashTable; +    const U32 hBitsL = cParams->hashLog; +    U32* const hashSmall = ms->chainTable; +    const U32 hBitsS = cParams->chainLog; +    const BYTE* const base = ms->window.base; +    const BYTE* const istart = (const BYTE*)src; +    const BYTE* anchor = istart; +    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); +    /* presumes that, if there is a dictionary, it must be using Attach mode */ +    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); +    const BYTE* const prefixLowest = base + prefixLowestIndex; +    const BYTE* const iend = istart + srcSize; +    const BYTE* const ilimit = iend - HASH_READ_SIZE; +    U32 offset_1=rep[0], offset_2=rep[1]; +    U32 offsetSaved = 0; + +    size_t mLength; +    U32 offset; +    U32 curr; + +    /* how many positions to search before increasing step size */ +    const size_t kStepIncr = 1 << kSearchStrength; +    /* the position at which to increment the step size if no match is found */ +    const BYTE* nextStep; +    size_t step; /* the current step size */ + +    size_t hl0; /* the long hash at ip */ +    size_t hl1; /* the long hash at ip1 */ + +    U32 idxl0; /* the long match index for ip */ +    U32 idxl1; /* the long match index for ip1 */ + +    const BYTE* matchl0; /* the long match for ip */ +    const BYTE* matchs0; /* the short match for ip */ +    const BYTE* matchl1; /* the long match for ip1 */ + +    const BYTE* ip = istart; /* the current position */ +    const BYTE* ip1; /* the next position */ + +    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); + +    /* init */ +    ip += ((ip - prefixLowest) == 0); +    { +        U32 const current = (U32)(ip - base); +        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); +        U32 const maxRep = current - windowLow; +        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; +        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; +    } + +    /* Outer Loop: one iteration per match found and stored */ +    while (1) { +        step = 1; +        nextStep = ip + kStepIncr; +        ip1 = ip + step; + +        if (ip1 > ilimit) { +            goto _cleanup; +        } + +        hl0 = ZSTD_hashPtr(ip, hBitsL, 8); +        idxl0 = hashLong[hl0]; +        matchl0 = base + idxl0; + +        /* Inner Loop: one iteration per search / position */ +        do { +            const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); +            const U32 idxs0 = hashSmall[hs0]; +            curr = (U32)(ip-base); +            matchs0 = base + idxs0; + +            hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */ + +            /* check noDict repcode */ +            if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { +                mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; +                ip++; +                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); +                goto _match_stored; +            } + +            hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + +            if (idxl0 > prefixLowestIndex) { +                /* check prefix long match */ +                if (MEM_read64(matchl0) == MEM_read64(ip)) { +                    mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; +                    offset = (U32)(ip-matchl0); +                    while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ +                    goto _match_found; +                } +            } + +            idxl1 = hashLong[hl1]; +            matchl1 = base + idxl1; + +            if (idxs0 > prefixLowestIndex) { +                /* check prefix short match */ +                if (MEM_read32(matchs0) == MEM_read32(ip)) { +                    goto _search_next_long; +                } +            } + +            if (ip1 >= nextStep) { +                PREFETCH_L1(ip1 + 64); +                PREFETCH_L1(ip1 + 128); +                step++; +                nextStep += kStepIncr; +            } +            ip = ip1; +            ip1 += step; + +            hl0 = hl1; +            idxl0 = idxl1; +            matchl0 = matchl1; +    #if defined(__aarch64__) +            PREFETCH_L1(ip+256); +    #endif +        } while (ip1 <= ilimit); + +_cleanup: +        /* save reps for next block */ +        rep[0] = offset_1 ? offset_1 : offsetSaved; +        rep[1] = offset_2 ? offset_2 : offsetSaved; + +        /* Return the last literals size */ +        return (size_t)(iend - anchor); + +_search_next_long: + +        /* check prefix long +1 match */ +        if (idxl1 > prefixLowestIndex) { +            if (MEM_read64(matchl1) == MEM_read64(ip1)) { +                ip = ip1; +                mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; +                offset = (U32)(ip-matchl1); +                while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ +                goto _match_found; +            } +        } + +        /* if no long +1 match, explore the short match we found */ +        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; +        offset = (U32)(ip - matchs0); +        while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ + +        /* fall-through */ + +_match_found: /* requires ip, offset, mLength */ +        offset_2 = offset_1; +        offset_1 = offset; + +        if (step < 4) { +            /* It is unsafe to write this value back to the hashtable when ip1 is +             * greater than or equal to the new ip we will have after we're done +             * processing this match. Rather than perform that test directly +             * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler +             * more predictable test. The minmatch even if we take a short match is +             * 4 bytes, so as long as step, the distance between ip and ip1 +             * (initially) is less than 4, we know ip1 < new ip. */ +            hashLong[hl1] = (U32)(ip1 - base); +        } + +        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); + +_match_stored: +        /* match found */ +        ip += mLength; +        anchor = ip; + +        if (ip <= ilimit) { +            /* Complementary insertion */ +            /* done after iLimit test, as candidates could be > iend-8 */ +            {   U32 const indexToInsert = curr+2; +                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; +                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); +                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; +                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); +            } + +            /* check immediate repcode */ +            while ( (ip <= ilimit) +                 && ( (offset_2>0) +                    & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { +                /* store sequence */ +                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; +                U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */ +                hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); +                hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); +                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); +                ip += rLength; +                anchor = ip; +                continue;   /* faster when present ... (?) */ +            } +        } +    } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(          ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],          void const* src, size_t srcSize, -        U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +        U32 const mls /* template */)  {      ZSTD_compressionParameters const* cParams = &ms->cParams;      U32* const hashLong = ms->hashTable; @@ -72,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(      U32 offsetSaved = 0;      const ZSTD_matchState_t* const dms = ms->dictMatchState; -    const ZSTD_compressionParameters* const dictCParams = -                                     dictMode == ZSTD_dictMatchState ? -                                     &dms->cParams : NULL; -    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ? -                                     dms->hashTable : NULL; -    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? -                                     dms->chainTable : NULL; -    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ? -                                     dms->window.dictLimit : 0; -    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ? -                                     dms->window.base : NULL; -    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ? -                                     dictBase + dictStartIndex : NULL; -    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ? -                                     dms->window.nextSrc : NULL; -    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ? -                                     prefixLowestIndex - (U32)(dictEnd - dictBase) : -                                     0; -    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ? -                                     dictCParams->hashLog : hBitsL; -    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ? -                                     dictCParams->chainLog : hBitsS; +    const ZSTD_compressionParameters* const dictCParams = &dms->cParams; +    const U32* const dictHashLong  = dms->hashTable; +    const U32* const dictHashSmall = dms->chainTable; +    const U32 dictStartIndex       = dms->window.dictLimit; +    const BYTE* const dictBase     = dms->window.base; +    const BYTE* const dictStart    = dictBase + dictStartIndex; +    const BYTE* const dictEnd      = dms->window.nextSrc; +    const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase); +    const U32 dictHBitsL           = dictCParams->hashLog; +    const U32 dictHBitsS           = dictCParams->chainLog;      const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); -    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); - -    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); +    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");      /* if a dictionary is attached, it must be within window range */ -    if (dictMode == ZSTD_dictMatchState) { -        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); -    } +    assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);      /* init */      ip += (dictAndPrefixLength == 0); -    if (dictMode == ZSTD_noDict) { -        U32 const curr = (U32)(ip - base); -        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); -        U32 const maxRep = curr - windowLow; -        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; -        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; -    } -    if (dictMode == ZSTD_dictMatchState) { -        /* dictMatchState repCode checks don't currently handle repCode == 0 -         * disabling. */ -        assert(offset_1 <= dictAndPrefixLength); -        assert(offset_2 <= dictAndPrefixLength); -    } + +    /* dictMatchState repCode checks don't currently handle repCode == 0 +     * disabling. */ +    assert(offset_1 <= dictAndPrefixLength); +    assert(offset_2 <= dictAndPrefixLength);      /* Main Search Loop */      while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */ @@ -135,29 +317,18 @@ size_t ZSTD_compressBlock_doubleFast_generic(          const BYTE* matchLong = base + matchIndexL;          const BYTE* match = base + matchIndexS;          const U32 repIndex = curr + 1 - offset_1; -        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState -                            && repIndex < prefixLowestIndex) ? +        const BYTE* repMatch = (repIndex < prefixLowestIndex) ?                                 dictBase + (repIndex - dictIndexDelta) :                                 base + repIndex;          hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */ -        /* check dictMatchState repcode */ -        if (dictMode == ZSTD_dictMatchState -            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) +        /* check repcode */ +        if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)              && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {              const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;              mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;              ip++; -            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); -            goto _match_stored; -        } - -        /* check noDict repcode */ -        if ( dictMode == ZSTD_noDict -          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { -            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; -            ip++; -            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); +            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);              goto _match_stored;          } @@ -169,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(                  while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */                  goto _match_found;              } -        } else if (dictMode == ZSTD_dictMatchState) { +        } else {              /* check dictMatchState long match */              U32 const dictMatchIndexL = dictHashLong[dictHL];              const BYTE* dictMatchL = dictBase + dictMatchIndexL; @@ -187,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(              if (MEM_read32(match) == MEM_read32(ip)) {                  goto _search_next_long;              } -        } else if (dictMode == ZSTD_dictMatchState) { +        } else {              /* check dictMatchState short match */              U32 const dictMatchIndexS = dictHashSmall[dictHS];              match = dictBase + dictMatchIndexS; @@ -220,7 +391,7 @@ _search_next_long:                      while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */                      goto _match_found;                  } -            } else if (dictMode == ZSTD_dictMatchState) { +            } else {                  /* check dict long +1 match */                  U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];                  const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; @@ -234,7 +405,7 @@ _search_next_long:          }   }   }          /* if no long +1 match, explore the short match we found */ -        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { +        if (matchIndexS < prefixLowestIndex) {              mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;              offset = (U32)(curr - matchIndexS);              while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ @@ -248,7 +419,7 @@ _match_found:          offset_2 = offset_1;          offset_1 = offset; -        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); +        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);  _match_stored:          /* match found */ @@ -266,43 +437,27 @@ _match_stored:              }              /* check immediate repcode */ -            if (dictMode == ZSTD_dictMatchState) { -                while (ip <= ilimit) { -                    U32 const current2 = (U32)(ip-base); -                    U32 const repIndex2 = current2 - offset_2; -                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState -                        && repIndex2 < prefixLowestIndex ? -                            dictBase + repIndex2 - dictIndexDelta : -                            base + repIndex2; -                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) -                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { -                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; -                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; -                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */ -                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); -                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; -                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; -                        ip += repLength2; -                        anchor = ip; -                        continue; -                    } -                    break; -            }   } - -            if (dictMode == ZSTD_noDict) { -                while ( (ip <= ilimit) -                     && ( (offset_2>0) -                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { -                    /* store sequence */ -                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; -                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */ -                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); -                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); -                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); -                    ip += rLength; +            while (ip <= ilimit) { +                U32 const current2 = (U32)(ip-base); +                U32 const repIndex2 = current2 - offset_2; +                const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? +                        dictBase + repIndex2 - dictIndexDelta : +                        base + repIndex2; +                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) +                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { +                    const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; +                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; +                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */ +                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); +                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; +                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; +                    ip += repLength2;                      anchor = ip; -                    continue;   /* faster when present ... (?) */ -        }   }   } +                    continue; +                } +                break; +            } +        }      }   /* while (ip < ilimit) */      /* save reps for next block */ @@ -313,6 +468,24 @@ _match_stored:      return (size_t)(iend - anchor);  } +#define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \ +    static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \ +            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \ +            void const* src, size_t srcSize)                                                             \ +    {                                                                                                    \ +        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ +    } + +ZSTD_GEN_DFAST_FN(noDict, 4) +ZSTD_GEN_DFAST_FN(noDict, 5) +ZSTD_GEN_DFAST_FN(noDict, 6) +ZSTD_GEN_DFAST_FN(noDict, 7) + +ZSTD_GEN_DFAST_FN(dictMatchState, 4) +ZSTD_GEN_DFAST_FN(dictMatchState, 5) +ZSTD_GEN_DFAST_FN(dictMatchState, 6) +ZSTD_GEN_DFAST_FN(dictMatchState, 7) +  size_t ZSTD_compressBlock_doubleFast(          ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -323,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast(      {      default: /* includes case 3 */      case 4 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); +        return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);      case 5 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); +        return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);      case 6 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); +        return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);      case 7 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); +        return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);      }  } @@ -343,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(      {      default: /* includes case 3 */      case 4 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); +        return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);      case 5 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); +        return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);      case 6 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); +        return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);      case 7 : -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); +        return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);      }  } @@ -385,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(      /* if extDict is invalidated due to maxDistance, switch to "regular" variant */      if (prefixStartIndex == dictStartIndex) -        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); +        return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);      /* Search Loop */      while (ip < ilimit) {  /* < instead of <=, because (ip+1) */ @@ -407,12 +580,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(          hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */          if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ -            & (repIndex > dictStartIndex)) +            & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {              const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;              mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;              ip++; -            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); +            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);          } else {              if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {                  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -423,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(                  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */                  offset_2 = offset_1;                  offset_1 = offset; -                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); +                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);              } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {                  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -448,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(                  }                  offset_2 = offset_1;                  offset_1 = offset; -                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); +                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);              } else {                  ip += ((ip-anchor) >> kSearchStrength) + 1; @@ -475,12 +648,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(                  U32 const repIndex2 = current2 - offset_2;                  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;                  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ -                    & (repIndex2 > dictStartIndex)) +                    & (offset_2 <= current2 - dictStartIndex))                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {                      const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;                      size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;                      U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */ -                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); +                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);                      hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;                      hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;                      ip += repLength2; @@ -498,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(      return (size_t)(iend - anchor);  } +ZSTD_GEN_DFAST_FN(extDict, 4) +ZSTD_GEN_DFAST_FN(extDict, 5) +ZSTD_GEN_DFAST_FN(extDict, 6) +ZSTD_GEN_DFAST_FN(extDict, 7)  size_t ZSTD_compressBlock_doubleFast_extDict(          ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -508,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(      {      default: /* includes case 3 */      case 4 : -        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); +        return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);      case 5 : -        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); +        return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);      case 6 : -        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); +        return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);      case 7 : -        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); +        return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);      }  }  |