1// Copyright 2019+ Klaus Post. All rights reserved. 2// License information can be found in the LICENSE file. 3// Based on work by Yann Collet, released under BSD License. 4 5package zstd 6 7import "fmt" 8 9const ( 10 betterLongTableBits = 19 // Bits used in the long match table 11 betterLongTableSize = 1 << betterLongTableBits // Size of the table 12 13 // Note: Increasing the short table bits or making the hash shorter 14 // can actually lead to compression degradation since it will 'steal' more from the 15 // long match table and match offsets are quite big. 16 // This greatly depends on the type of input. 17 betterShortTableBits = 13 // Bits used in the short match table 18 betterShortTableSize = 1 << betterShortTableBits // Size of the table 19 20 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table 21 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard 22 23 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table 24 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard 25) 26 27type prevEntry struct { 28 offset int32 29 prev int32 30} 31 32// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. 33// The long match table contains the previous entry with the same hash, 34// effectively making it a "chain" of length 2. 35// When we find a long match we choose between the two values and select the longest. 36// When we find a short match, after checking the long, we check if we can find a long at n+1 37// and that it is longer (lazy matching). 38type betterFastEncoder struct { 39 fastBase 40 table [betterShortTableSize]tableEntry 41 longTable [betterLongTableSize]prevEntry 42} 43 44type betterFastEncoderDict struct { 45 betterFastEncoder 46 dictTable []tableEntry 47 dictLongTable []prevEntry 48 shortTableShardDirty [betterShortTableShardCnt]bool 49 longTableShardDirty [betterLongTableShardCnt]bool 50 allDirty bool 51} 52 53// Encode improves compression... 54func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { 55 const ( 56 // Input margin is the number of bytes we read (8) 57 // and the maximum we will read ahead (2) 58 inputMargin = 8 + 2 59 minNonLiteralBlockSize = 16 60 ) 61 62 // Protect against e.cur wraparound. 63 for e.cur >= bufferReset { 64 if len(e.hist) == 0 { 65 for i := range e.table[:] { 66 e.table[i] = tableEntry{} 67 } 68 for i := range e.longTable[:] { 69 e.longTable[i] = prevEntry{} 70 } 71 e.cur = e.maxMatchOff 72 break 73 } 74 // Shift down everything in the table that isn't already too far away. 75 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 76 for i := range e.table[:] { 77 v := e.table[i].offset 78 if v < minOff { 79 v = 0 80 } else { 81 v = v - e.cur + e.maxMatchOff 82 } 83 e.table[i].offset = v 84 } 85 for i := range e.longTable[:] { 86 v := e.longTable[i].offset 87 v2 := e.longTable[i].prev 88 if v < minOff { 89 v = 0 90 v2 = 0 91 } else { 92 v = v - e.cur + e.maxMatchOff 93 if v2 < minOff { 94 v2 = 0 95 } else { 96 v2 = v2 - e.cur + e.maxMatchOff 97 } 98 } 99 e.longTable[i] = prevEntry{ 100 offset: v, 101 prev: v2, 102 } 103 } 104 e.cur = e.maxMatchOff 105 break 106 } 107 108 s := e.addBlock(src) 109 blk.size = len(src) 110 if len(src) < minNonLiteralBlockSize { 111 blk.extraLits = len(src) 112 blk.literals = blk.literals[:len(src)] 113 copy(blk.literals, src) 114 return 115 } 116 117 // Override src 118 src = e.hist 119 sLimit := int32(len(src)) - inputMargin 120 // stepSize is the number of bytes to skip on every main loop iteration. 121 // It should be >= 1. 122 const stepSize = 1 123 124 const kSearchStrength = 9 125 126 // nextEmit is where in src the next emitLiteral should start from. 127 nextEmit := s 128 cv := load6432(src, s) 129 130 // Relative offsets 131 offset1 := int32(blk.recentOffsets[0]) 132 offset2 := int32(blk.recentOffsets[1]) 133 134 addLiterals := func(s *seq, until int32) { 135 if until == nextEmit { 136 return 137 } 138 blk.literals = append(blk.literals, src[nextEmit:until]...) 139 s.litLen = uint32(until - nextEmit) 140 } 141 if debugEncoder { 142 println("recent offsets:", blk.recentOffsets) 143 } 144 145encodeLoop: 146 for { 147 var t int32 148 // We allow the encoder to optionally turn off repeat offsets across blocks 149 canRepeat := len(blk.sequences) > 2 150 var matched int32 151 152 for { 153 if debugAsserts && canRepeat && offset1 == 0 { 154 panic("offset0 was 0") 155 } 156 157 nextHashS := hash5(cv, betterShortTableBits) 158 nextHashL := hash8(cv, betterLongTableBits) 159 candidateL := e.longTable[nextHashL] 160 candidateS := e.table[nextHashS] 161 162 const repOff = 1 163 repIndex := s - offset1 + repOff 164 off := s + e.cur 165 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} 166 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} 167 168 if canRepeat { 169 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 170 // Consider history as well. 171 var seq seq 172 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 173 174 seq.matchLen = uint32(lenght - zstdMinMatch) 175 176 // We might be able to match backwards. 177 // Extend as long as we can. 178 start := s + repOff 179 // We end the search early, so we don't risk 0 literals 180 // and have to do special offset treatment. 181 startLimit := nextEmit + 1 182 183 tMin := s - e.maxMatchOff 184 if tMin < 0 { 185 tMin = 0 186 } 187 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 188 repIndex-- 189 start-- 190 seq.matchLen++ 191 } 192 addLiterals(&seq, start) 193 194 // rep 0 195 seq.offset = 1 196 if debugSequences { 197 println("repeat sequence", seq, "next s:", s) 198 } 199 blk.sequences = append(blk.sequences, seq) 200 201 // Index match start+1 (long) -> s - 1 202 index0 := s + repOff 203 s += lenght + repOff 204 205 nextEmit = s 206 if s >= sLimit { 207 if debugEncoder { 208 println("repeat ended", s, lenght) 209 210 } 211 break encodeLoop 212 } 213 // Index skipped... 214 for index0 < s-1 { 215 cv0 := load6432(src, index0) 216 cv1 := cv0 >> 8 217 h0 := hash8(cv0, betterLongTableBits) 218 off := index0 + e.cur 219 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 220 e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} 221 index0 += 2 222 } 223 cv = load6432(src, s) 224 continue 225 } 226 const repOff2 = 1 227 228 // We deviate from the reference encoder and also check offset 2. 229 // Still slower and not much better, so disabled. 230 // repIndex = s - offset2 + repOff2 231 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { 232 // Consider history as well. 233 var seq seq 234 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) 235 236 seq.matchLen = uint32(lenght - zstdMinMatch) 237 238 // We might be able to match backwards. 239 // Extend as long as we can. 240 start := s + repOff2 241 // We end the search early, so we don't risk 0 literals 242 // and have to do special offset treatment. 243 startLimit := nextEmit + 1 244 245 tMin := s - e.maxMatchOff 246 if tMin < 0 { 247 tMin = 0 248 } 249 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 250 repIndex-- 251 start-- 252 seq.matchLen++ 253 } 254 addLiterals(&seq, start) 255 256 // rep 2 257 seq.offset = 2 258 if debugSequences { 259 println("repeat sequence 2", seq, "next s:", s) 260 } 261 blk.sequences = append(blk.sequences, seq) 262 263 index0 := s + repOff2 264 s += lenght + repOff2 265 nextEmit = s 266 if s >= sLimit { 267 if debugEncoder { 268 println("repeat ended", s, lenght) 269 270 } 271 break encodeLoop 272 } 273 274 // Index skipped... 275 for index0 < s-1 { 276 cv0 := load6432(src, index0) 277 cv1 := cv0 >> 8 278 h0 := hash8(cv0, betterLongTableBits) 279 off := index0 + e.cur 280 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 281 e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} 282 index0 += 2 283 } 284 cv = load6432(src, s) 285 // Swap offsets 286 offset1, offset2 = offset2, offset1 287 continue 288 } 289 } 290 // Find the offsets of our two matches. 291 coffsetL := candidateL.offset - e.cur 292 coffsetLP := candidateL.prev - e.cur 293 294 // Check if we have a long match. 295 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 296 // Found a long match, at least 8 bytes. 297 matched = e.matchlen(s+8, coffsetL+8, src) + 8 298 t = coffsetL 299 if debugAsserts && s <= t { 300 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 301 } 302 if debugAsserts && s-t > e.maxMatchOff { 303 panic("s - t >e.maxMatchOff") 304 } 305 if debugMatches { 306 println("long match") 307 } 308 309 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 310 // Found a long match, at least 8 bytes. 311 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 312 if prevMatch > matched { 313 matched = prevMatch 314 t = coffsetLP 315 } 316 if debugAsserts && s <= t { 317 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 318 } 319 if debugAsserts && s-t > e.maxMatchOff { 320 panic("s - t >e.maxMatchOff") 321 } 322 if debugMatches { 323 println("long match") 324 } 325 } 326 break 327 } 328 329 // Check if we have a long match on prev. 330 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 331 // Found a long match, at least 8 bytes. 332 matched = e.matchlen(s+8, coffsetLP+8, src) + 8 333 t = coffsetLP 334 if debugAsserts && s <= t { 335 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 336 } 337 if debugAsserts && s-t > e.maxMatchOff { 338 panic("s - t >e.maxMatchOff") 339 } 340 if debugMatches { 341 println("long match") 342 } 343 break 344 } 345 346 coffsetS := candidateS.offset - e.cur 347 348 // Check if we have a short match. 349 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 350 // found a regular match 351 matched = e.matchlen(s+4, coffsetS+4, src) + 4 352 353 // See if we can find a long match at s+1 354 const checkAt = 1 355 cv := load6432(src, s+checkAt) 356 nextHashL = hash8(cv, betterLongTableBits) 357 candidateL = e.longTable[nextHashL] 358 coffsetL = candidateL.offset - e.cur 359 360 // We can store it, since we have at least a 4 byte match. 361 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} 362 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 363 // Found a long match, at least 8 bytes. 364 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 365 if matchedNext > matched { 366 t = coffsetL 367 s += checkAt 368 matched = matchedNext 369 if debugMatches { 370 println("long match (after short)") 371 } 372 break 373 } 374 } 375 376 // Check prev long... 377 coffsetL = candidateL.prev - e.cur 378 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 379 // Found a long match, at least 8 bytes. 380 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 381 if matchedNext > matched { 382 t = coffsetL 383 s += checkAt 384 matched = matchedNext 385 if debugMatches { 386 println("prev long match (after short)") 387 } 388 break 389 } 390 } 391 t = coffsetS 392 if debugAsserts && s <= t { 393 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 394 } 395 if debugAsserts && s-t > e.maxMatchOff { 396 panic("s - t >e.maxMatchOff") 397 } 398 if debugAsserts && t < 0 { 399 panic("t<0") 400 } 401 if debugMatches { 402 println("short match") 403 } 404 break 405 } 406 407 // No match found, move forward in input. 408 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 409 if s >= sLimit { 410 break encodeLoop 411 } 412 cv = load6432(src, s) 413 } 414 415 // Try to find a better match by searching for a long match at the end of the current best match 416 if true && s+matched < sLimit { 417 nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) 418 cv := load3232(src, s) 419 candidateL := e.longTable[nextHashL] 420 coffsetL := candidateL.offset - e.cur - matched 421 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 422 // Found a long match, at least 4 bytes. 423 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 424 if matchedNext > matched { 425 t = coffsetL 426 matched = matchedNext 427 if debugMatches { 428 println("long match at end-of-match") 429 } 430 } 431 } 432 433 // Check prev long... 434 if true { 435 coffsetL = candidateL.prev - e.cur - matched 436 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 437 // Found a long match, at least 4 bytes. 438 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 439 if matchedNext > matched { 440 t = coffsetL 441 matched = matchedNext 442 if debugMatches { 443 println("prev long match at end-of-match") 444 } 445 } 446 } 447 } 448 } 449 // A match has been found. Update recent offsets. 450 offset2 = offset1 451 offset1 = s - t 452 453 if debugAsserts && s <= t { 454 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 455 } 456 457 if debugAsserts && canRepeat && int(offset1) > len(src) { 458 panic("invalid offset") 459 } 460 461 // Extend the n-byte match as long as possible. 462 l := matched 463 464 // Extend backwards 465 tMin := s - e.maxMatchOff 466 if tMin < 0 { 467 tMin = 0 468 } 469 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 470 s-- 471 t-- 472 l++ 473 } 474 475 // Write our sequence 476 var seq seq 477 seq.litLen = uint32(s - nextEmit) 478 seq.matchLen = uint32(l - zstdMinMatch) 479 if seq.litLen > 0 { 480 blk.literals = append(blk.literals, src[nextEmit:s]...) 481 } 482 seq.offset = uint32(s-t) + 3 483 s += l 484 if debugSequences { 485 println("sequence", seq, "next s:", s) 486 } 487 blk.sequences = append(blk.sequences, seq) 488 nextEmit = s 489 if s >= sLimit { 490 break encodeLoop 491 } 492 493 // Index match start+1 (long) -> s - 1 494 index0 := s - l + 1 495 for index0 < s-1 { 496 cv0 := load6432(src, index0) 497 cv1 := cv0 >> 8 498 h0 := hash8(cv0, betterLongTableBits) 499 off := index0 + e.cur 500 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 501 e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} 502 index0 += 2 503 } 504 505 cv = load6432(src, s) 506 if !canRepeat { 507 continue 508 } 509 510 // Check offset 2 511 for { 512 o2 := s - offset2 513 if load3232(src, o2) != uint32(cv) { 514 // Do regular search 515 break 516 } 517 518 // Store this, since we have it. 519 nextHashS := hash5(cv, betterShortTableBits) 520 nextHashL := hash8(cv, betterLongTableBits) 521 522 // We have at least 4 byte match. 523 // No need to check backwards. We come straight from a match 524 l := 4 + e.matchlen(s+4, o2+4, src) 525 526 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} 527 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} 528 seq.matchLen = uint32(l) - zstdMinMatch 529 seq.litLen = 0 530 531 // Since litlen is always 0, this is offset 1. 532 seq.offset = 1 533 s += l 534 nextEmit = s 535 if debugSequences { 536 println("sequence", seq, "next s:", s) 537 } 538 blk.sequences = append(blk.sequences, seq) 539 540 // Swap offset 1 and 2. 541 offset1, offset2 = offset2, offset1 542 if s >= sLimit { 543 // Finished 544 break encodeLoop 545 } 546 cv = load6432(src, s) 547 } 548 } 549 550 if int(nextEmit) < len(src) { 551 blk.literals = append(blk.literals, src[nextEmit:]...) 552 blk.extraLits = len(src) - int(nextEmit) 553 } 554 blk.recentOffsets[0] = uint32(offset1) 555 blk.recentOffsets[1] = uint32(offset2) 556 if debugEncoder { 557 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 558 } 559} 560 561// EncodeNoHist will encode a block with no history and no following blocks. 562// Most notable difference is that src will not be copied for history and 563// we do not need to check for max match length. 564func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { 565 e.ensureHist(len(src)) 566 e.Encode(blk, src) 567} 568 569// Encode improves compression... 570func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { 571 const ( 572 // Input margin is the number of bytes we read (8) 573 // and the maximum we will read ahead (2) 574 inputMargin = 8 + 2 575 minNonLiteralBlockSize = 16 576 ) 577 578 // Protect against e.cur wraparound. 579 for e.cur >= bufferReset { 580 if len(e.hist) == 0 { 581 for i := range e.table[:] { 582 e.table[i] = tableEntry{} 583 } 584 for i := range e.longTable[:] { 585 e.longTable[i] = prevEntry{} 586 } 587 e.cur = e.maxMatchOff 588 e.allDirty = true 589 break 590 } 591 // Shift down everything in the table that isn't already too far away. 592 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 593 for i := range e.table[:] { 594 v := e.table[i].offset 595 if v < minOff { 596 v = 0 597 } else { 598 v = v - e.cur + e.maxMatchOff 599 } 600 e.table[i].offset = v 601 } 602 for i := range e.longTable[:] { 603 v := e.longTable[i].offset 604 v2 := e.longTable[i].prev 605 if v < minOff { 606 v = 0 607 v2 = 0 608 } else { 609 v = v - e.cur + e.maxMatchOff 610 if v2 < minOff { 611 v2 = 0 612 } else { 613 v2 = v2 - e.cur + e.maxMatchOff 614 } 615 } 616 e.longTable[i] = prevEntry{ 617 offset: v, 618 prev: v2, 619 } 620 } 621 e.allDirty = true 622 e.cur = e.maxMatchOff 623 break 624 } 625 626 s := e.addBlock(src) 627 blk.size = len(src) 628 if len(src) < minNonLiteralBlockSize { 629 blk.extraLits = len(src) 630 blk.literals = blk.literals[:len(src)] 631 copy(blk.literals, src) 632 return 633 } 634 635 // Override src 636 src = e.hist 637 sLimit := int32(len(src)) - inputMargin 638 // stepSize is the number of bytes to skip on every main loop iteration. 639 // It should be >= 1. 640 const stepSize = 1 641 642 const kSearchStrength = 9 643 644 // nextEmit is where in src the next emitLiteral should start from. 645 nextEmit := s 646 cv := load6432(src, s) 647 648 // Relative offsets 649 offset1 := int32(blk.recentOffsets[0]) 650 offset2 := int32(blk.recentOffsets[1]) 651 652 addLiterals := func(s *seq, until int32) { 653 if until == nextEmit { 654 return 655 } 656 blk.literals = append(blk.literals, src[nextEmit:until]...) 657 s.litLen = uint32(until - nextEmit) 658 } 659 if debugEncoder { 660 println("recent offsets:", blk.recentOffsets) 661 } 662 663encodeLoop: 664 for { 665 var t int32 666 // We allow the encoder to optionally turn off repeat offsets across blocks 667 canRepeat := len(blk.sequences) > 2 668 var matched int32 669 670 for { 671 if debugAsserts && canRepeat && offset1 == 0 { 672 panic("offset0 was 0") 673 } 674 675 nextHashS := hash5(cv, betterShortTableBits) 676 nextHashL := hash8(cv, betterLongTableBits) 677 candidateL := e.longTable[nextHashL] 678 candidateS := e.table[nextHashS] 679 680 const repOff = 1 681 repIndex := s - offset1 + repOff 682 off := s + e.cur 683 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} 684 e.markLongShardDirty(nextHashL) 685 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} 686 e.markShortShardDirty(nextHashS) 687 688 if canRepeat { 689 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 690 // Consider history as well. 691 var seq seq 692 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 693 694 seq.matchLen = uint32(lenght - zstdMinMatch) 695 696 // We might be able to match backwards. 697 // Extend as long as we can. 698 start := s + repOff 699 // We end the search early, so we don't risk 0 literals 700 // and have to do special offset treatment. 701 startLimit := nextEmit + 1 702 703 tMin := s - e.maxMatchOff 704 if tMin < 0 { 705 tMin = 0 706 } 707 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 708 repIndex-- 709 start-- 710 seq.matchLen++ 711 } 712 addLiterals(&seq, start) 713 714 // rep 0 715 seq.offset = 1 716 if debugSequences { 717 println("repeat sequence", seq, "next s:", s) 718 } 719 blk.sequences = append(blk.sequences, seq) 720 721 // Index match start+1 (long) -> s - 1 722 index0 := s + repOff 723 s += lenght + repOff 724 725 nextEmit = s 726 if s >= sLimit { 727 if debugEncoder { 728 println("repeat ended", s, lenght) 729 730 } 731 break encodeLoop 732 } 733 // Index skipped... 734 for index0 < s-1 { 735 cv0 := load6432(src, index0) 736 cv1 := cv0 >> 8 737 h0 := hash8(cv0, betterLongTableBits) 738 off := index0 + e.cur 739 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 740 e.markLongShardDirty(h0) 741 h1 := hash5(cv1, betterShortTableBits) 742 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 743 e.markShortShardDirty(h1) 744 index0 += 2 745 } 746 cv = load6432(src, s) 747 continue 748 } 749 const repOff2 = 1 750 751 // We deviate from the reference encoder and also check offset 2. 752 // Still slower and not much better, so disabled. 753 // repIndex = s - offset2 + repOff2 754 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { 755 // Consider history as well. 756 var seq seq 757 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) 758 759 seq.matchLen = uint32(lenght - zstdMinMatch) 760 761 // We might be able to match backwards. 762 // Extend as long as we can. 763 start := s + repOff2 764 // We end the search early, so we don't risk 0 literals 765 // and have to do special offset treatment. 766 startLimit := nextEmit + 1 767 768 tMin := s - e.maxMatchOff 769 if tMin < 0 { 770 tMin = 0 771 } 772 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 773 repIndex-- 774 start-- 775 seq.matchLen++ 776 } 777 addLiterals(&seq, start) 778 779 // rep 2 780 seq.offset = 2 781 if debugSequences { 782 println("repeat sequence 2", seq, "next s:", s) 783 } 784 blk.sequences = append(blk.sequences, seq) 785 786 index0 := s + repOff2 787 s += lenght + repOff2 788 nextEmit = s 789 if s >= sLimit { 790 if debugEncoder { 791 println("repeat ended", s, lenght) 792 793 } 794 break encodeLoop 795 } 796 797 // Index skipped... 798 for index0 < s-1 { 799 cv0 := load6432(src, index0) 800 cv1 := cv0 >> 8 801 h0 := hash8(cv0, betterLongTableBits) 802 off := index0 + e.cur 803 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 804 e.markLongShardDirty(h0) 805 h1 := hash5(cv1, betterShortTableBits) 806 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 807 e.markShortShardDirty(h1) 808 index0 += 2 809 } 810 cv = load6432(src, s) 811 // Swap offsets 812 offset1, offset2 = offset2, offset1 813 continue 814 } 815 } 816 // Find the offsets of our two matches. 817 coffsetL := candidateL.offset - e.cur 818 coffsetLP := candidateL.prev - e.cur 819 820 // Check if we have a long match. 821 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 822 // Found a long match, at least 8 bytes. 823 matched = e.matchlen(s+8, coffsetL+8, src) + 8 824 t = coffsetL 825 if debugAsserts && s <= t { 826 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 827 } 828 if debugAsserts && s-t > e.maxMatchOff { 829 panic("s - t >e.maxMatchOff") 830 } 831 if debugMatches { 832 println("long match") 833 } 834 835 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 836 // Found a long match, at least 8 bytes. 837 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 838 if prevMatch > matched { 839 matched = prevMatch 840 t = coffsetLP 841 } 842 if debugAsserts && s <= t { 843 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 844 } 845 if debugAsserts && s-t > e.maxMatchOff { 846 panic("s - t >e.maxMatchOff") 847 } 848 if debugMatches { 849 println("long match") 850 } 851 } 852 break 853 } 854 855 // Check if we have a long match on prev. 856 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 857 // Found a long match, at least 8 bytes. 858 matched = e.matchlen(s+8, coffsetLP+8, src) + 8 859 t = coffsetLP 860 if debugAsserts && s <= t { 861 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 862 } 863 if debugAsserts && s-t > e.maxMatchOff { 864 panic("s - t >e.maxMatchOff") 865 } 866 if debugMatches { 867 println("long match") 868 } 869 break 870 } 871 872 coffsetS := candidateS.offset - e.cur 873 874 // Check if we have a short match. 875 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 876 // found a regular match 877 matched = e.matchlen(s+4, coffsetS+4, src) + 4 878 879 // See if we can find a long match at s+1 880 const checkAt = 1 881 cv := load6432(src, s+checkAt) 882 nextHashL = hash8(cv, betterLongTableBits) 883 candidateL = e.longTable[nextHashL] 884 coffsetL = candidateL.offset - e.cur 885 886 // We can store it, since we have at least a 4 byte match. 887 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} 888 e.markLongShardDirty(nextHashL) 889 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 890 // Found a long match, at least 8 bytes. 891 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 892 if matchedNext > matched { 893 t = coffsetL 894 s += checkAt 895 matched = matchedNext 896 if debugMatches { 897 println("long match (after short)") 898 } 899 break 900 } 901 } 902 903 // Check prev long... 904 coffsetL = candidateL.prev - e.cur 905 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 906 // Found a long match, at least 8 bytes. 907 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 908 if matchedNext > matched { 909 t = coffsetL 910 s += checkAt 911 matched = matchedNext 912 if debugMatches { 913 println("prev long match (after short)") 914 } 915 break 916 } 917 } 918 t = coffsetS 919 if debugAsserts && s <= t { 920 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 921 } 922 if debugAsserts && s-t > e.maxMatchOff { 923 panic("s - t >e.maxMatchOff") 924 } 925 if debugAsserts && t < 0 { 926 panic("t<0") 927 } 928 if debugMatches { 929 println("short match") 930 } 931 break 932 } 933 934 // No match found, move forward in input. 935 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 936 if s >= sLimit { 937 break encodeLoop 938 } 939 cv = load6432(src, s) 940 } 941 // Try to find a better match by searching for a long match at the end of the current best match 942 if s+matched < sLimit { 943 nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) 944 cv := load3232(src, s) 945 candidateL := e.longTable[nextHashL] 946 coffsetL := candidateL.offset - e.cur - matched 947 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 948 // Found a long match, at least 4 bytes. 949 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 950 if matchedNext > matched { 951 t = coffsetL 952 matched = matchedNext 953 if debugMatches { 954 println("long match at end-of-match") 955 } 956 } 957 } 958 959 // Check prev long... 960 if true { 961 coffsetL = candidateL.prev - e.cur - matched 962 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 963 // Found a long match, at least 4 bytes. 964 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 965 if matchedNext > matched { 966 t = coffsetL 967 matched = matchedNext 968 if debugMatches { 969 println("prev long match at end-of-match") 970 } 971 } 972 } 973 } 974 } 975 // A match has been found. Update recent offsets. 976 offset2 = offset1 977 offset1 = s - t 978 979 if debugAsserts && s <= t { 980 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 981 } 982 983 if debugAsserts && canRepeat && int(offset1) > len(src) { 984 panic("invalid offset") 985 } 986 987 // Extend the n-byte match as long as possible. 988 l := matched 989 990 // Extend backwards 991 tMin := s - e.maxMatchOff 992 if tMin < 0 { 993 tMin = 0 994 } 995 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 996 s-- 997 t-- 998 l++ 999 } 1000 1001 // Write our sequence 1002 var seq seq 1003 seq.litLen = uint32(s - nextEmit) 1004 seq.matchLen = uint32(l - zstdMinMatch) 1005 if seq.litLen > 0 { 1006 blk.literals = append(blk.literals, src[nextEmit:s]...) 1007 } 1008 seq.offset = uint32(s-t) + 3 1009 s += l 1010 if debugSequences { 1011 println("sequence", seq, "next s:", s) 1012 } 1013 blk.sequences = append(blk.sequences, seq) 1014 nextEmit = s 1015 if s >= sLimit { 1016 break encodeLoop 1017 } 1018 1019 // Index match start+1 (long) -> s - 1 1020 index0 := s - l + 1 1021 for index0 < s-1 { 1022 cv0 := load6432(src, index0) 1023 cv1 := cv0 >> 8 1024 h0 := hash8(cv0, betterLongTableBits) 1025 off := index0 + e.cur 1026 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 1027 e.markLongShardDirty(h0) 1028 h1 := hash5(cv1, betterShortTableBits) 1029 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 1030 e.markShortShardDirty(h1) 1031 index0 += 2 1032 } 1033 1034 cv = load6432(src, s) 1035 if !canRepeat { 1036 continue 1037 } 1038 1039 // Check offset 2 1040 for { 1041 o2 := s - offset2 1042 if load3232(src, o2) != uint32(cv) { 1043 // Do regular search 1044 break 1045 } 1046 1047 // Store this, since we have it. 1048 nextHashS := hash5(cv, betterShortTableBits) 1049 nextHashL := hash8(cv, betterLongTableBits) 1050 1051 // We have at least 4 byte match. 1052 // No need to check backwards. We come straight from a match 1053 l := 4 + e.matchlen(s+4, o2+4, src) 1054 1055 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} 1056 e.markLongShardDirty(nextHashL) 1057 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} 1058 e.markShortShardDirty(nextHashS) 1059 seq.matchLen = uint32(l) - zstdMinMatch 1060 seq.litLen = 0 1061 1062 // Since litlen is always 0, this is offset 1. 1063 seq.offset = 1 1064 s += l 1065 nextEmit = s 1066 if debugSequences { 1067 println("sequence", seq, "next s:", s) 1068 } 1069 blk.sequences = append(blk.sequences, seq) 1070 1071 // Swap offset 1 and 2. 1072 offset1, offset2 = offset2, offset1 1073 if s >= sLimit { 1074 // Finished 1075 break encodeLoop 1076 } 1077 cv = load6432(src, s) 1078 } 1079 } 1080 1081 if int(nextEmit) < len(src) { 1082 blk.literals = append(blk.literals, src[nextEmit:]...) 1083 blk.extraLits = len(src) - int(nextEmit) 1084 } 1085 blk.recentOffsets[0] = uint32(offset1) 1086 blk.recentOffsets[1] = uint32(offset2) 1087 if debugEncoder { 1088 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 1089 } 1090} 1091 1092// ResetDict will reset and set a dictionary if not nil 1093func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { 1094 e.resetBase(d, singleBlock) 1095 if d != nil { 1096 panic("betterFastEncoder: Reset with dict") 1097 } 1098} 1099 1100// ResetDict will reset and set a dictionary if not nil 1101func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { 1102 e.resetBase(d, singleBlock) 1103 if d == nil { 1104 return 1105 } 1106 // Init or copy dict table 1107 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { 1108 if len(e.dictTable) != len(e.table) { 1109 e.dictTable = make([]tableEntry, len(e.table)) 1110 } 1111 end := int32(len(d.content)) - 8 + e.maxMatchOff 1112 for i := e.maxMatchOff; i < end; i += 4 { 1113 const hashLog = betterShortTableBits 1114 1115 cv := load6432(d.content, i-e.maxMatchOff) 1116 nextHash := hash5(cv, hashLog) // 0 -> 4 1117 nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5 1118 nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6 1119 nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7 1120 e.dictTable[nextHash] = tableEntry{ 1121 val: uint32(cv), 1122 offset: i, 1123 } 1124 e.dictTable[nextHash1] = tableEntry{ 1125 val: uint32(cv >> 8), 1126 offset: i + 1, 1127 } 1128 e.dictTable[nextHash2] = tableEntry{ 1129 val: uint32(cv >> 16), 1130 offset: i + 2, 1131 } 1132 e.dictTable[nextHash3] = tableEntry{ 1133 val: uint32(cv >> 24), 1134 offset: i + 3, 1135 } 1136 } 1137 e.lastDictID = d.id 1138 e.allDirty = true 1139 } 1140 1141 // Init or copy dict table 1142 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { 1143 if len(e.dictLongTable) != len(e.longTable) { 1144 e.dictLongTable = make([]prevEntry, len(e.longTable)) 1145 } 1146 if len(d.content) >= 8 { 1147 cv := load6432(d.content, 0) 1148 h := hash8(cv, betterLongTableBits) 1149 e.dictLongTable[h] = prevEntry{ 1150 offset: e.maxMatchOff, 1151 prev: e.dictLongTable[h].offset, 1152 } 1153 1154 end := int32(len(d.content)) - 8 + e.maxMatchOff 1155 off := 8 // First to read 1156 for i := e.maxMatchOff + 1; i < end; i++ { 1157 cv = cv>>8 | (uint64(d.content[off]) << 56) 1158 h := hash8(cv, betterLongTableBits) 1159 e.dictLongTable[h] = prevEntry{ 1160 offset: i, 1161 prev: e.dictLongTable[h].offset, 1162 } 1163 off++ 1164 } 1165 } 1166 e.lastDictID = d.id 1167 e.allDirty = true 1168 } 1169 1170 // Reset table to initial state 1171 { 1172 dirtyShardCnt := 0 1173 if !e.allDirty { 1174 for i := range e.shortTableShardDirty { 1175 if e.shortTableShardDirty[i] { 1176 dirtyShardCnt++ 1177 } 1178 } 1179 } 1180 const shardCnt = betterShortTableShardCnt 1181 const shardSize = betterShortTableShardSize 1182 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 1183 copy(e.table[:], e.dictTable) 1184 for i := range e.shortTableShardDirty { 1185 e.shortTableShardDirty[i] = false 1186 } 1187 } else { 1188 for i := range e.shortTableShardDirty { 1189 if !e.shortTableShardDirty[i] { 1190 continue 1191 } 1192 1193 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) 1194 e.shortTableShardDirty[i] = false 1195 } 1196 } 1197 } 1198 { 1199 dirtyShardCnt := 0 1200 if !e.allDirty { 1201 for i := range e.shortTableShardDirty { 1202 if e.shortTableShardDirty[i] { 1203 dirtyShardCnt++ 1204 } 1205 } 1206 } 1207 const shardCnt = betterLongTableShardCnt 1208 const shardSize = betterLongTableShardSize 1209 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 1210 copy(e.longTable[:], e.dictLongTable) 1211 for i := range e.longTableShardDirty { 1212 e.longTableShardDirty[i] = false 1213 } 1214 } else { 1215 for i := range e.longTableShardDirty { 1216 if !e.longTableShardDirty[i] { 1217 continue 1218 } 1219 1220 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) 1221 e.longTableShardDirty[i] = false 1222 } 1223 } 1224 } 1225 e.cur = e.maxMatchOff 1226 e.allDirty = false 1227} 1228 1229func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { 1230 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true 1231} 1232 1233func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { 1234 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true 1235} 1236