1// Copyright 2019+ Klaus Post. All rights reserved. 2// License information can be found in the LICENSE file. 3// Based on work by Yann Collet, released under BSD License. 4 5package zstd 6 7import "fmt" 8 9const ( 10 betterLongTableBits = 19 // Bits used in the long match table 11 betterLongTableSize = 1 << betterLongTableBits // Size of the table 12 betterLongLen = 8 // Bytes used for table hash 13 14 // Note: Increasing the short table bits or making the hash shorter 15 // can actually lead to compression degradation since it will 'steal' more from the 16 // long match table and match offsets are quite big. 17 // This greatly depends on the type of input. 18 betterShortTableBits = 13 // Bits used in the short match table 19 betterShortTableSize = 1 << betterShortTableBits // Size of the table 20 betterShortLen = 5 // Bytes used for table hash 21 22 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table 23 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard 24 25 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table 26 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard 27) 28 29type prevEntry struct { 30 offset int32 31 prev int32 32} 33 34// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. 35// The long match table contains the previous entry with the same hash, 36// effectively making it a "chain" of length 2. 37// When we find a long match we choose between the two values and select the longest. 38// When we find a short match, after checking the long, we check if we can find a long at n+1 39// and that it is longer (lazy matching). 40type betterFastEncoder struct { 41 fastBase 42 table [betterShortTableSize]tableEntry 43 longTable [betterLongTableSize]prevEntry 44} 45 46type betterFastEncoderDict struct { 47 betterFastEncoder 48 dictTable []tableEntry 49 dictLongTable []prevEntry 50 shortTableShardDirty [betterShortTableShardCnt]bool 51 longTableShardDirty [betterLongTableShardCnt]bool 52 allDirty bool 53} 54 55// Encode improves compression... 56func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { 57 const ( 58 // Input margin is the number of bytes we read (8) 59 // and the maximum we will read ahead (2) 60 inputMargin = 8 + 2 61 minNonLiteralBlockSize = 16 62 ) 63 64 // Protect against e.cur wraparound. 65 for e.cur >= bufferReset { 66 if len(e.hist) == 0 { 67 for i := range e.table[:] { 68 e.table[i] = tableEntry{} 69 } 70 for i := range e.longTable[:] { 71 e.longTable[i] = prevEntry{} 72 } 73 e.cur = e.maxMatchOff 74 break 75 } 76 // Shift down everything in the table that isn't already too far away. 77 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 78 for i := range e.table[:] { 79 v := e.table[i].offset 80 if v < minOff { 81 v = 0 82 } else { 83 v = v - e.cur + e.maxMatchOff 84 } 85 e.table[i].offset = v 86 } 87 for i := range e.longTable[:] { 88 v := e.longTable[i].offset 89 v2 := e.longTable[i].prev 90 if v < minOff { 91 v = 0 92 v2 = 0 93 } else { 94 v = v - e.cur + e.maxMatchOff 95 if v2 < minOff { 96 v2 = 0 97 } else { 98 v2 = v2 - e.cur + e.maxMatchOff 99 } 100 } 101 e.longTable[i] = prevEntry{ 102 offset: v, 103 prev: v2, 104 } 105 } 106 e.cur = e.maxMatchOff 107 break 108 } 109 110 s := e.addBlock(src) 111 blk.size = len(src) 112 if len(src) < minNonLiteralBlockSize { 113 blk.extraLits = len(src) 114 blk.literals = blk.literals[:len(src)] 115 copy(blk.literals, src) 116 return 117 } 118 119 // Override src 120 src = e.hist 121 sLimit := int32(len(src)) - inputMargin 122 // stepSize is the number of bytes to skip on every main loop iteration. 123 // It should be >= 1. 124 const stepSize = 1 125 126 const kSearchStrength = 9 127 128 // nextEmit is where in src the next emitLiteral should start from. 129 nextEmit := s 130 cv := load6432(src, s) 131 132 // Relative offsets 133 offset1 := int32(blk.recentOffsets[0]) 134 offset2 := int32(blk.recentOffsets[1]) 135 136 addLiterals := func(s *seq, until int32) { 137 if until == nextEmit { 138 return 139 } 140 blk.literals = append(blk.literals, src[nextEmit:until]...) 141 s.litLen = uint32(until - nextEmit) 142 } 143 if debugEncoder { 144 println("recent offsets:", blk.recentOffsets) 145 } 146 147encodeLoop: 148 for { 149 var t int32 150 // We allow the encoder to optionally turn off repeat offsets across blocks 151 canRepeat := len(blk.sequences) > 2 152 var matched int32 153 154 for { 155 if debugAsserts && canRepeat && offset1 == 0 { 156 panic("offset0 was 0") 157 } 158 159 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 160 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 161 candidateL := e.longTable[nextHashL] 162 candidateS := e.table[nextHashS] 163 164 const repOff = 1 165 repIndex := s - offset1 + repOff 166 off := s + e.cur 167 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} 168 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} 169 170 if canRepeat { 171 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 172 // Consider history as well. 173 var seq seq 174 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 175 176 seq.matchLen = uint32(lenght - zstdMinMatch) 177 178 // We might be able to match backwards. 179 // Extend as long as we can. 180 start := s + repOff 181 // We end the search early, so we don't risk 0 literals 182 // and have to do special offset treatment. 183 startLimit := nextEmit + 1 184 185 tMin := s - e.maxMatchOff 186 if tMin < 0 { 187 tMin = 0 188 } 189 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 190 repIndex-- 191 start-- 192 seq.matchLen++ 193 } 194 addLiterals(&seq, start) 195 196 // rep 0 197 seq.offset = 1 198 if debugSequences { 199 println("repeat sequence", seq, "next s:", s) 200 } 201 blk.sequences = append(blk.sequences, seq) 202 203 // Index match start+1 (long) -> s - 1 204 index0 := s + repOff 205 s += lenght + repOff 206 207 nextEmit = s 208 if s >= sLimit { 209 if debugEncoder { 210 println("repeat ended", s, lenght) 211 212 } 213 break encodeLoop 214 } 215 // Index skipped... 216 for index0 < s-1 { 217 cv0 := load6432(src, index0) 218 cv1 := cv0 >> 8 219 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 220 off := index0 + e.cur 221 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 222 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} 223 index0 += 2 224 } 225 cv = load6432(src, s) 226 continue 227 } 228 const repOff2 = 1 229 230 // We deviate from the reference encoder and also check offset 2. 231 // Still slower and not much better, so disabled. 232 // repIndex = s - offset2 + repOff2 233 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { 234 // Consider history as well. 235 var seq seq 236 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) 237 238 seq.matchLen = uint32(lenght - zstdMinMatch) 239 240 // We might be able to match backwards. 241 // Extend as long as we can. 242 start := s + repOff2 243 // We end the search early, so we don't risk 0 literals 244 // and have to do special offset treatment. 245 startLimit := nextEmit + 1 246 247 tMin := s - e.maxMatchOff 248 if tMin < 0 { 249 tMin = 0 250 } 251 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 252 repIndex-- 253 start-- 254 seq.matchLen++ 255 } 256 addLiterals(&seq, start) 257 258 // rep 2 259 seq.offset = 2 260 if debugSequences { 261 println("repeat sequence 2", seq, "next s:", s) 262 } 263 blk.sequences = append(blk.sequences, seq) 264 265 index0 := s + repOff2 266 s += lenght + repOff2 267 nextEmit = s 268 if s >= sLimit { 269 if debugEncoder { 270 println("repeat ended", s, lenght) 271 272 } 273 break encodeLoop 274 } 275 276 // Index skipped... 277 for index0 < s-1 { 278 cv0 := load6432(src, index0) 279 cv1 := cv0 >> 8 280 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 281 off := index0 + e.cur 282 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 283 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} 284 index0 += 2 285 } 286 cv = load6432(src, s) 287 // Swap offsets 288 offset1, offset2 = offset2, offset1 289 continue 290 } 291 } 292 // Find the offsets of our two matches. 293 coffsetL := candidateL.offset - e.cur 294 coffsetLP := candidateL.prev - e.cur 295 296 // Check if we have a long match. 297 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 298 // Found a long match, at least 8 bytes. 299 matched = e.matchlen(s+8, coffsetL+8, src) + 8 300 t = coffsetL 301 if debugAsserts && s <= t { 302 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 303 } 304 if debugAsserts && s-t > e.maxMatchOff { 305 panic("s - t >e.maxMatchOff") 306 } 307 if debugMatches { 308 println("long match") 309 } 310 311 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 312 // Found a long match, at least 8 bytes. 313 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 314 if prevMatch > matched { 315 matched = prevMatch 316 t = coffsetLP 317 } 318 if debugAsserts && s <= t { 319 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 320 } 321 if debugAsserts && s-t > e.maxMatchOff { 322 panic("s - t >e.maxMatchOff") 323 } 324 if debugMatches { 325 println("long match") 326 } 327 } 328 break 329 } 330 331 // Check if we have a long match on prev. 332 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 333 // Found a long match, at least 8 bytes. 334 matched = e.matchlen(s+8, coffsetLP+8, src) + 8 335 t = coffsetLP 336 if debugAsserts && s <= t { 337 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 338 } 339 if debugAsserts && s-t > e.maxMatchOff { 340 panic("s - t >e.maxMatchOff") 341 } 342 if debugMatches { 343 println("long match") 344 } 345 break 346 } 347 348 coffsetS := candidateS.offset - e.cur 349 350 // Check if we have a short match. 351 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 352 // found a regular match 353 matched = e.matchlen(s+4, coffsetS+4, src) + 4 354 355 // See if we can find a long match at s+1 356 const checkAt = 1 357 cv := load6432(src, s+checkAt) 358 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) 359 candidateL = e.longTable[nextHashL] 360 coffsetL = candidateL.offset - e.cur 361 362 // We can store it, since we have at least a 4 byte match. 363 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} 364 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 365 // Found a long match, at least 8 bytes. 366 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 367 if matchedNext > matched { 368 t = coffsetL 369 s += checkAt 370 matched = matchedNext 371 if debugMatches { 372 println("long match (after short)") 373 } 374 break 375 } 376 } 377 378 // Check prev long... 379 coffsetL = candidateL.prev - e.cur 380 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 381 // Found a long match, at least 8 bytes. 382 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 383 if matchedNext > matched { 384 t = coffsetL 385 s += checkAt 386 matched = matchedNext 387 if debugMatches { 388 println("prev long match (after short)") 389 } 390 break 391 } 392 } 393 t = coffsetS 394 if debugAsserts && s <= t { 395 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 396 } 397 if debugAsserts && s-t > e.maxMatchOff { 398 panic("s - t >e.maxMatchOff") 399 } 400 if debugAsserts && t < 0 { 401 panic("t<0") 402 } 403 if debugMatches { 404 println("short match") 405 } 406 break 407 } 408 409 // No match found, move forward in input. 410 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 411 if s >= sLimit { 412 break encodeLoop 413 } 414 cv = load6432(src, s) 415 } 416 417 // Try to find a better match by searching for a long match at the end of the current best match 418 if s+matched < sLimit { 419 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) 420 cv := load3232(src, s) 421 candidateL := e.longTable[nextHashL] 422 coffsetL := candidateL.offset - e.cur - matched 423 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 424 // Found a long match, at least 4 bytes. 425 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 426 if matchedNext > matched { 427 t = coffsetL 428 matched = matchedNext 429 if debugMatches { 430 println("long match at end-of-match") 431 } 432 } 433 } 434 435 // Check prev long... 436 if true { 437 coffsetL = candidateL.prev - e.cur - matched 438 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 439 // Found a long match, at least 4 bytes. 440 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 441 if matchedNext > matched { 442 t = coffsetL 443 matched = matchedNext 444 if debugMatches { 445 println("prev long match at end-of-match") 446 } 447 } 448 } 449 } 450 } 451 // A match has been found. Update recent offsets. 452 offset2 = offset1 453 offset1 = s - t 454 455 if debugAsserts && s <= t { 456 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 457 } 458 459 if debugAsserts && canRepeat && int(offset1) > len(src) { 460 panic("invalid offset") 461 } 462 463 // Extend the n-byte match as long as possible. 464 l := matched 465 466 // Extend backwards 467 tMin := s - e.maxMatchOff 468 if tMin < 0 { 469 tMin = 0 470 } 471 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 472 s-- 473 t-- 474 l++ 475 } 476 477 // Write our sequence 478 var seq seq 479 seq.litLen = uint32(s - nextEmit) 480 seq.matchLen = uint32(l - zstdMinMatch) 481 if seq.litLen > 0 { 482 blk.literals = append(blk.literals, src[nextEmit:s]...) 483 } 484 seq.offset = uint32(s-t) + 3 485 s += l 486 if debugSequences { 487 println("sequence", seq, "next s:", s) 488 } 489 blk.sequences = append(blk.sequences, seq) 490 nextEmit = s 491 if s >= sLimit { 492 break encodeLoop 493 } 494 495 // Index match start+1 (long) -> s - 1 496 index0 := s - l + 1 497 for index0 < s-1 { 498 cv0 := load6432(src, index0) 499 cv1 := cv0 >> 8 500 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 501 off := index0 + e.cur 502 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 503 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} 504 index0 += 2 505 } 506 507 cv = load6432(src, s) 508 if !canRepeat { 509 continue 510 } 511 512 // Check offset 2 513 for { 514 o2 := s - offset2 515 if load3232(src, o2) != uint32(cv) { 516 // Do regular search 517 break 518 } 519 520 // Store this, since we have it. 521 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 522 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 523 524 // We have at least 4 byte match. 525 // No need to check backwards. We come straight from a match 526 l := 4 + e.matchlen(s+4, o2+4, src) 527 528 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} 529 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} 530 seq.matchLen = uint32(l) - zstdMinMatch 531 seq.litLen = 0 532 533 // Since litlen is always 0, this is offset 1. 534 seq.offset = 1 535 s += l 536 nextEmit = s 537 if debugSequences { 538 println("sequence", seq, "next s:", s) 539 } 540 blk.sequences = append(blk.sequences, seq) 541 542 // Swap offset 1 and 2. 543 offset1, offset2 = offset2, offset1 544 if s >= sLimit { 545 // Finished 546 break encodeLoop 547 } 548 cv = load6432(src, s) 549 } 550 } 551 552 if int(nextEmit) < len(src) { 553 blk.literals = append(blk.literals, src[nextEmit:]...) 554 blk.extraLits = len(src) - int(nextEmit) 555 } 556 blk.recentOffsets[0] = uint32(offset1) 557 blk.recentOffsets[1] = uint32(offset2) 558 if debugEncoder { 559 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 560 } 561} 562 563// EncodeNoHist will encode a block with no history and no following blocks. 564// Most notable difference is that src will not be copied for history and 565// we do not need to check for max match length. 566func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { 567 e.ensureHist(len(src)) 568 e.Encode(blk, src) 569} 570 571// Encode improves compression... 572func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { 573 const ( 574 // Input margin is the number of bytes we read (8) 575 // and the maximum we will read ahead (2) 576 inputMargin = 8 + 2 577 minNonLiteralBlockSize = 16 578 ) 579 580 // Protect against e.cur wraparound. 581 for e.cur >= bufferReset { 582 if len(e.hist) == 0 { 583 for i := range e.table[:] { 584 e.table[i] = tableEntry{} 585 } 586 for i := range e.longTable[:] { 587 e.longTable[i] = prevEntry{} 588 } 589 e.cur = e.maxMatchOff 590 e.allDirty = true 591 break 592 } 593 // Shift down everything in the table that isn't already too far away. 594 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 595 for i := range e.table[:] { 596 v := e.table[i].offset 597 if v < minOff { 598 v = 0 599 } else { 600 v = v - e.cur + e.maxMatchOff 601 } 602 e.table[i].offset = v 603 } 604 for i := range e.longTable[:] { 605 v := e.longTable[i].offset 606 v2 := e.longTable[i].prev 607 if v < minOff { 608 v = 0 609 v2 = 0 610 } else { 611 v = v - e.cur + e.maxMatchOff 612 if v2 < minOff { 613 v2 = 0 614 } else { 615 v2 = v2 - e.cur + e.maxMatchOff 616 } 617 } 618 e.longTable[i] = prevEntry{ 619 offset: v, 620 prev: v2, 621 } 622 } 623 e.allDirty = true 624 e.cur = e.maxMatchOff 625 break 626 } 627 628 s := e.addBlock(src) 629 blk.size = len(src) 630 if len(src) < minNonLiteralBlockSize { 631 blk.extraLits = len(src) 632 blk.literals = blk.literals[:len(src)] 633 copy(blk.literals, src) 634 return 635 } 636 637 // Override src 638 src = e.hist 639 sLimit := int32(len(src)) - inputMargin 640 // stepSize is the number of bytes to skip on every main loop iteration. 641 // It should be >= 1. 642 const stepSize = 1 643 644 const kSearchStrength = 9 645 646 // nextEmit is where in src the next emitLiteral should start from. 647 nextEmit := s 648 cv := load6432(src, s) 649 650 // Relative offsets 651 offset1 := int32(blk.recentOffsets[0]) 652 offset2 := int32(blk.recentOffsets[1]) 653 654 addLiterals := func(s *seq, until int32) { 655 if until == nextEmit { 656 return 657 } 658 blk.literals = append(blk.literals, src[nextEmit:until]...) 659 s.litLen = uint32(until - nextEmit) 660 } 661 if debugEncoder { 662 println("recent offsets:", blk.recentOffsets) 663 } 664 665encodeLoop: 666 for { 667 var t int32 668 // We allow the encoder to optionally turn off repeat offsets across blocks 669 canRepeat := len(blk.sequences) > 2 670 var matched int32 671 672 for { 673 if debugAsserts && canRepeat && offset1 == 0 { 674 panic("offset0 was 0") 675 } 676 677 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 678 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 679 candidateL := e.longTable[nextHashL] 680 candidateS := e.table[nextHashS] 681 682 const repOff = 1 683 repIndex := s - offset1 + repOff 684 off := s + e.cur 685 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} 686 e.markLongShardDirty(nextHashL) 687 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} 688 e.markShortShardDirty(nextHashS) 689 690 if canRepeat { 691 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 692 // Consider history as well. 693 var seq seq 694 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 695 696 seq.matchLen = uint32(lenght - zstdMinMatch) 697 698 // We might be able to match backwards. 699 // Extend as long as we can. 700 start := s + repOff 701 // We end the search early, so we don't risk 0 literals 702 // and have to do special offset treatment. 703 startLimit := nextEmit + 1 704 705 tMin := s - e.maxMatchOff 706 if tMin < 0 { 707 tMin = 0 708 } 709 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 710 repIndex-- 711 start-- 712 seq.matchLen++ 713 } 714 addLiterals(&seq, start) 715 716 // rep 0 717 seq.offset = 1 718 if debugSequences { 719 println("repeat sequence", seq, "next s:", s) 720 } 721 blk.sequences = append(blk.sequences, seq) 722 723 // Index match start+1 (long) -> s - 1 724 index0 := s + repOff 725 s += lenght + repOff 726 727 nextEmit = s 728 if s >= sLimit { 729 if debugEncoder { 730 println("repeat ended", s, lenght) 731 732 } 733 break encodeLoop 734 } 735 // Index skipped... 736 for index0 < s-1 { 737 cv0 := load6432(src, index0) 738 cv1 := cv0 >> 8 739 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 740 off := index0 + e.cur 741 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 742 e.markLongShardDirty(h0) 743 h1 := hashLen(cv1, betterShortTableBits, betterShortLen) 744 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 745 e.markShortShardDirty(h1) 746 index0 += 2 747 } 748 cv = load6432(src, s) 749 continue 750 } 751 const repOff2 = 1 752 753 // We deviate from the reference encoder and also check offset 2. 754 // Still slower and not much better, so disabled. 755 // repIndex = s - offset2 + repOff2 756 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { 757 // Consider history as well. 758 var seq seq 759 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) 760 761 seq.matchLen = uint32(lenght - zstdMinMatch) 762 763 // We might be able to match backwards. 764 // Extend as long as we can. 765 start := s + repOff2 766 // We end the search early, so we don't risk 0 literals 767 // and have to do special offset treatment. 768 startLimit := nextEmit + 1 769 770 tMin := s - e.maxMatchOff 771 if tMin < 0 { 772 tMin = 0 773 } 774 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 775 repIndex-- 776 start-- 777 seq.matchLen++ 778 } 779 addLiterals(&seq, start) 780 781 // rep 2 782 seq.offset = 2 783 if debugSequences { 784 println("repeat sequence 2", seq, "next s:", s) 785 } 786 blk.sequences = append(blk.sequences, seq) 787 788 index0 := s + repOff2 789 s += lenght + repOff2 790 nextEmit = s 791 if s >= sLimit { 792 if debugEncoder { 793 println("repeat ended", s, lenght) 794 795 } 796 break encodeLoop 797 } 798 799 // Index skipped... 800 for index0 < s-1 { 801 cv0 := load6432(src, index0) 802 cv1 := cv0 >> 8 803 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 804 off := index0 + e.cur 805 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 806 e.markLongShardDirty(h0) 807 h1 := hashLen(cv1, betterShortTableBits, betterShortLen) 808 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 809 e.markShortShardDirty(h1) 810 index0 += 2 811 } 812 cv = load6432(src, s) 813 // Swap offsets 814 offset1, offset2 = offset2, offset1 815 continue 816 } 817 } 818 // Find the offsets of our two matches. 819 coffsetL := candidateL.offset - e.cur 820 coffsetLP := candidateL.prev - e.cur 821 822 // Check if we have a long match. 823 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 824 // Found a long match, at least 8 bytes. 825 matched = e.matchlen(s+8, coffsetL+8, src) + 8 826 t = coffsetL 827 if debugAsserts && s <= t { 828 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 829 } 830 if debugAsserts && s-t > e.maxMatchOff { 831 panic("s - t >e.maxMatchOff") 832 } 833 if debugMatches { 834 println("long match") 835 } 836 837 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 838 // Found a long match, at least 8 bytes. 839 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 840 if prevMatch > matched { 841 matched = prevMatch 842 t = coffsetLP 843 } 844 if debugAsserts && s <= t { 845 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 846 } 847 if debugAsserts && s-t > e.maxMatchOff { 848 panic("s - t >e.maxMatchOff") 849 } 850 if debugMatches { 851 println("long match") 852 } 853 } 854 break 855 } 856 857 // Check if we have a long match on prev. 858 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 859 // Found a long match, at least 8 bytes. 860 matched = e.matchlen(s+8, coffsetLP+8, src) + 8 861 t = coffsetLP 862 if debugAsserts && s <= t { 863 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 864 } 865 if debugAsserts && s-t > e.maxMatchOff { 866 panic("s - t >e.maxMatchOff") 867 } 868 if debugMatches { 869 println("long match") 870 } 871 break 872 } 873 874 coffsetS := candidateS.offset - e.cur 875 876 // Check if we have a short match. 877 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 878 // found a regular match 879 matched = e.matchlen(s+4, coffsetS+4, src) + 4 880 881 // See if we can find a long match at s+1 882 const checkAt = 1 883 cv := load6432(src, s+checkAt) 884 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) 885 candidateL = e.longTable[nextHashL] 886 coffsetL = candidateL.offset - e.cur 887 888 // We can store it, since we have at least a 4 byte match. 889 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} 890 e.markLongShardDirty(nextHashL) 891 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 892 // Found a long match, at least 8 bytes. 893 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 894 if matchedNext > matched { 895 t = coffsetL 896 s += checkAt 897 matched = matchedNext 898 if debugMatches { 899 println("long match (after short)") 900 } 901 break 902 } 903 } 904 905 // Check prev long... 906 coffsetL = candidateL.prev - e.cur 907 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 908 // Found a long match, at least 8 bytes. 909 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 910 if matchedNext > matched { 911 t = coffsetL 912 s += checkAt 913 matched = matchedNext 914 if debugMatches { 915 println("prev long match (after short)") 916 } 917 break 918 } 919 } 920 t = coffsetS 921 if debugAsserts && s <= t { 922 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 923 } 924 if debugAsserts && s-t > e.maxMatchOff { 925 panic("s - t >e.maxMatchOff") 926 } 927 if debugAsserts && t < 0 { 928 panic("t<0") 929 } 930 if debugMatches { 931 println("short match") 932 } 933 break 934 } 935 936 // No match found, move forward in input. 937 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 938 if s >= sLimit { 939 break encodeLoop 940 } 941 cv = load6432(src, s) 942 } 943 // Try to find a better match by searching for a long match at the end of the current best match 944 if s+matched < sLimit { 945 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) 946 cv := load3232(src, s) 947 candidateL := e.longTable[nextHashL] 948 coffsetL := candidateL.offset - e.cur - matched 949 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 950 // Found a long match, at least 4 bytes. 951 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 952 if matchedNext > matched { 953 t = coffsetL 954 matched = matchedNext 955 if debugMatches { 956 println("long match at end-of-match") 957 } 958 } 959 } 960 961 // Check prev long... 962 if true { 963 coffsetL = candidateL.prev - e.cur - matched 964 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 965 // Found a long match, at least 4 bytes. 966 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 967 if matchedNext > matched { 968 t = coffsetL 969 matched = matchedNext 970 if debugMatches { 971 println("prev long match at end-of-match") 972 } 973 } 974 } 975 } 976 } 977 // A match has been found. Update recent offsets. 978 offset2 = offset1 979 offset1 = s - t 980 981 if debugAsserts && s <= t { 982 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 983 } 984 985 if debugAsserts && canRepeat && int(offset1) > len(src) { 986 panic("invalid offset") 987 } 988 989 // Extend the n-byte match as long as possible. 990 l := matched 991 992 // Extend backwards 993 tMin := s - e.maxMatchOff 994 if tMin < 0 { 995 tMin = 0 996 } 997 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 998 s-- 999 t-- 1000 l++ 1001 } 1002 1003 // Write our sequence 1004 var seq seq 1005 seq.litLen = uint32(s - nextEmit) 1006 seq.matchLen = uint32(l - zstdMinMatch) 1007 if seq.litLen > 0 { 1008 blk.literals = append(blk.literals, src[nextEmit:s]...) 1009 } 1010 seq.offset = uint32(s-t) + 3 1011 s += l 1012 if debugSequences { 1013 println("sequence", seq, "next s:", s) 1014 } 1015 blk.sequences = append(blk.sequences, seq) 1016 nextEmit = s 1017 if s >= sLimit { 1018 break encodeLoop 1019 } 1020 1021 // Index match start+1 (long) -> s - 1 1022 index0 := s - l + 1 1023 for index0 < s-1 { 1024 cv0 := load6432(src, index0) 1025 cv1 := cv0 >> 8 1026 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 1027 off := index0 + e.cur 1028 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 1029 e.markLongShardDirty(h0) 1030 h1 := hashLen(cv1, betterShortTableBits, betterShortLen) 1031 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 1032 e.markShortShardDirty(h1) 1033 index0 += 2 1034 } 1035 1036 cv = load6432(src, s) 1037 if !canRepeat { 1038 continue 1039 } 1040 1041 // Check offset 2 1042 for { 1043 o2 := s - offset2 1044 if load3232(src, o2) != uint32(cv) { 1045 // Do regular search 1046 break 1047 } 1048 1049 // Store this, since we have it. 1050 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 1051 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 1052 1053 // We have at least 4 byte match. 1054 // No need to check backwards. We come straight from a match 1055 l := 4 + e.matchlen(s+4, o2+4, src) 1056 1057 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} 1058 e.markLongShardDirty(nextHashL) 1059 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} 1060 e.markShortShardDirty(nextHashS) 1061 seq.matchLen = uint32(l) - zstdMinMatch 1062 seq.litLen = 0 1063 1064 // Since litlen is always 0, this is offset 1. 1065 seq.offset = 1 1066 s += l 1067 nextEmit = s 1068 if debugSequences { 1069 println("sequence", seq, "next s:", s) 1070 } 1071 blk.sequences = append(blk.sequences, seq) 1072 1073 // Swap offset 1 and 2. 1074 offset1, offset2 = offset2, offset1 1075 if s >= sLimit { 1076 // Finished 1077 break encodeLoop 1078 } 1079 cv = load6432(src, s) 1080 } 1081 } 1082 1083 if int(nextEmit) < len(src) { 1084 blk.literals = append(blk.literals, src[nextEmit:]...) 1085 blk.extraLits = len(src) - int(nextEmit) 1086 } 1087 blk.recentOffsets[0] = uint32(offset1) 1088 blk.recentOffsets[1] = uint32(offset2) 1089 if debugEncoder { 1090 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 1091 } 1092} 1093 1094// ResetDict will reset and set a dictionary if not nil 1095func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { 1096 e.resetBase(d, singleBlock) 1097 if d != nil { 1098 panic("betterFastEncoder: Reset with dict") 1099 } 1100} 1101 1102// ResetDict will reset and set a dictionary if not nil 1103func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { 1104 e.resetBase(d, singleBlock) 1105 if d == nil { 1106 return 1107 } 1108 // Init or copy dict table 1109 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { 1110 if len(e.dictTable) != len(e.table) { 1111 e.dictTable = make([]tableEntry, len(e.table)) 1112 } 1113 end := int32(len(d.content)) - 8 + e.maxMatchOff 1114 for i := e.maxMatchOff; i < end; i += 4 { 1115 const hashLog = betterShortTableBits 1116 1117 cv := load6432(d.content, i-e.maxMatchOff) 1118 nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4 1119 nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5 1120 nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6 1121 nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7 1122 e.dictTable[nextHash] = tableEntry{ 1123 val: uint32(cv), 1124 offset: i, 1125 } 1126 e.dictTable[nextHash1] = tableEntry{ 1127 val: uint32(cv >> 8), 1128 offset: i + 1, 1129 } 1130 e.dictTable[nextHash2] = tableEntry{ 1131 val: uint32(cv >> 16), 1132 offset: i + 2, 1133 } 1134 e.dictTable[nextHash3] = tableEntry{ 1135 val: uint32(cv >> 24), 1136 offset: i + 3, 1137 } 1138 } 1139 e.lastDictID = d.id 1140 e.allDirty = true 1141 } 1142 1143 // Init or copy dict table 1144 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { 1145 if len(e.dictLongTable) != len(e.longTable) { 1146 e.dictLongTable = make([]prevEntry, len(e.longTable)) 1147 } 1148 if len(d.content) >= 8 { 1149 cv := load6432(d.content, 0) 1150 h := hashLen(cv, betterLongTableBits, betterLongLen) 1151 e.dictLongTable[h] = prevEntry{ 1152 offset: e.maxMatchOff, 1153 prev: e.dictLongTable[h].offset, 1154 } 1155 1156 end := int32(len(d.content)) - 8 + e.maxMatchOff 1157 off := 8 // First to read 1158 for i := e.maxMatchOff + 1; i < end; i++ { 1159 cv = cv>>8 | (uint64(d.content[off]) << 56) 1160 h := hashLen(cv, betterLongTableBits, betterLongLen) 1161 e.dictLongTable[h] = prevEntry{ 1162 offset: i, 1163 prev: e.dictLongTable[h].offset, 1164 } 1165 off++ 1166 } 1167 } 1168 e.lastDictID = d.id 1169 e.allDirty = true 1170 } 1171 1172 // Reset table to initial state 1173 { 1174 dirtyShardCnt := 0 1175 if !e.allDirty { 1176 for i := range e.shortTableShardDirty { 1177 if e.shortTableShardDirty[i] { 1178 dirtyShardCnt++ 1179 } 1180 } 1181 } 1182 const shardCnt = betterShortTableShardCnt 1183 const shardSize = betterShortTableShardSize 1184 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 1185 copy(e.table[:], e.dictTable) 1186 for i := range e.shortTableShardDirty { 1187 e.shortTableShardDirty[i] = false 1188 } 1189 } else { 1190 for i := range e.shortTableShardDirty { 1191 if !e.shortTableShardDirty[i] { 1192 continue 1193 } 1194 1195 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) 1196 e.shortTableShardDirty[i] = false 1197 } 1198 } 1199 } 1200 { 1201 dirtyShardCnt := 0 1202 if !e.allDirty { 1203 for i := range e.shortTableShardDirty { 1204 if e.shortTableShardDirty[i] { 1205 dirtyShardCnt++ 1206 } 1207 } 1208 } 1209 const shardCnt = betterLongTableShardCnt 1210 const shardSize = betterLongTableShardSize 1211 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 1212 copy(e.longTable[:], e.dictLongTable) 1213 for i := range e.longTableShardDirty { 1214 e.longTableShardDirty[i] = false 1215 } 1216 } else { 1217 for i := range e.longTableShardDirty { 1218 if !e.longTableShardDirty[i] { 1219 continue 1220 } 1221 1222 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) 1223 e.longTableShardDirty[i] = false 1224 } 1225 } 1226 } 1227 e.cur = e.maxMatchOff 1228 e.allDirty = false 1229} 1230 1231func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { 1232 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true 1233} 1234 1235func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { 1236 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true 1237} 1238