update dependencies (#6267)

Signed-off-by: hongming <coder.scala@gmail.com>
This commit is contained in:
hongming
2024-11-06 10:27:06 +08:00
committed by GitHub
parent faf255a084
commit cfebd96a1f
4263 changed files with 341374 additions and 132036 deletions

View File

@@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
ofc = ofCode(uint32(m.rep) & 3)
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@@ -135,8 +135,20 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
break
}
// Add block to history
s := e.addBlock(src)
blk.size = len(src)
// Check RLE first
if len(src) > zstdMinMatch {
ml := matchLen(src[1:], src)
if ml == len(src)-1 {
blk.literals = append(blk.literals, src[0])
blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3})
return
}
}
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
@@ -197,17 +209,10 @@ encodeLoop:
// Set m to a match at offset if it looks like that will improve compression.
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
delta := s - offset
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
return
}
if debugAsserts {
if offset <= 0 {
panic(offset)
}
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
// Try to quick reject if we already have a long match.
if m.length > 16 {
left := len(src) - int(m.s+m.length)
@@ -226,8 +231,10 @@ encodeLoop:
}
}
l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
if m.rep <= 0 {
// Extend candidate match backwards as far as possible.
// Do not extend repeats as we can assume they are optimal
// and offsets change if s == nextEmit.
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
@@ -238,7 +245,14 @@ encodeLoop:
l++
}
}
if debugAsserts {
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
cand := match{offset: offset, s: s, length: l, rep: rep}
cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
@@ -281,6 +295,7 @@ encodeLoop:
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
index0 := s + 1
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
@@ -334,41 +349,45 @@ encodeLoop:
}
if debugAsserts {
if best.offset >= best.s {
panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
}
if best.s < nextEmit {
panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
}
if best.offset < s-e.maxMatchOff {
panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
}
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
}
}
// We have a match, we can store the forward value
s = best.s
if best.rep > 0 {
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
if debugAsserts && s <= nextEmit {
panic("s <= nextEmit")
}
addLiterals(&seq, best.s)
// Repeat. If bit 4 is set, this is a non-lit repeat.
seq.offset = uint32(best.rep & 3)
if debugSequences {
println("repeat sequence", seq, "next s:", s)
println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
}
blk.sequences = append(blk.sequences, seq)
// Index old s + 1 -> s - 1
index0 := s + 1
s = best.s + best.length
nextEmit = s
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
off := index0 + e.cur
for index0 < s {
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -377,6 +396,7 @@ encodeLoop:
off++
index0++
}
switch best.rep {
case 2, 4 | 1:
offset1, offset2 = offset2, offset1
@@ -385,13 +405,17 @@ encodeLoop:
case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2
}
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
@@ -418,19 +442,25 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
// Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
}
// Index old s + 1 -> s - 1
for index0 < s {
off := index0 + e.cur
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
off++
}
if s >= sLimit {
break encodeLoop
}
}