1// Copyright 2019+ Klaus Post. All rights reserved. 2// License information can be found in the LICENSE file. 3// Based on work by Yann Collet, released under BSD License. 4 5package zstd 6 7import ( 8 "fmt" 9 "math" 10 "math/bits" 11) 12 13const ( 14 tableBits = 15 // Bits used in the table 15 tableSize = 1 << tableBits // Size of the table 16 tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table 17 tableShardSize = tableSize / tableShardCnt // Size of an individual shard 18 tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. 19 maxMatchLength = 131074 20) 21 22type tableEntry struct { 23 val uint32 24 offset int32 25} 26 27type fastEncoder struct { 28 fastBase 29 table [tableSize]tableEntry 30} 31 32type fastEncoderDict struct { 33 fastEncoder 34 dictTable []tableEntry 35 tableShardDirty [tableShardCnt]bool 36 allDirty bool 37} 38 39// Encode mimmics functionality in zstd_fast.c 40func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { 41 const ( 42 inputMargin = 8 43 minNonLiteralBlockSize = 1 + 1 + inputMargin 44 ) 45 46 // Protect against e.cur wraparound. 47 for e.cur >= bufferReset { 48 if len(e.hist) == 0 { 49 for i := range e.table[:] { 50 e.table[i] = tableEntry{} 51 } 52 e.cur = e.maxMatchOff 53 break 54 } 55 // Shift down everything in the table that isn't already too far away. 56 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 57 for i := range e.table[:] { 58 v := e.table[i].offset 59 if v < minOff { 60 v = 0 61 } else { 62 v = v - e.cur + e.maxMatchOff 63 } 64 e.table[i].offset = v 65 } 66 e.cur = e.maxMatchOff 67 break 68 } 69 70 s := e.addBlock(src) 71 blk.size = len(src) 72 if len(src) < minNonLiteralBlockSize { 73 blk.extraLits = len(src) 74 blk.literals = blk.literals[:len(src)] 75 copy(blk.literals, src) 76 return 77 } 78 79 // Override src 80 src = e.hist 81 sLimit := int32(len(src)) - inputMargin 82 // stepSize is the number of bytes to skip on every main loop iteration. 83 // It should be >= 2. 84 const stepSize = 2 85 86 // TEMPLATE 87 const hashLog = tableBits 88 // seems global, but would be nice to tweak. 89 const kSearchStrength = 7 90 91 // nextEmit is where in src the next emitLiteral should start from. 92 nextEmit := s 93 cv := load6432(src, s) 94 95 // Relative offsets 96 offset1 := int32(blk.recentOffsets[0]) 97 offset2 := int32(blk.recentOffsets[1]) 98 99 addLiterals := func(s *seq, until int32) { 100 if until == nextEmit { 101 return 102 } 103 blk.literals = append(blk.literals, src[nextEmit:until]...) 104 s.litLen = uint32(until - nextEmit) 105 } 106 if debug { 107 println("recent offsets:", blk.recentOffsets) 108 } 109 110encodeLoop: 111 for { 112 // t will contain the match offset when we find one. 113 // When existing the search loop, we have already checked 4 bytes. 114 var t int32 115 116 // We will not use repeat offsets across blocks. 117 // By not using them for the first 3 matches 118 canRepeat := len(blk.sequences) > 2 119 120 for { 121 if debugAsserts && canRepeat && offset1 == 0 { 122 panic("offset0 was 0") 123 } 124 125 nextHash := hash6(cv, hashLog) 126 nextHash2 := hash6(cv>>8, hashLog) 127 candidate := e.table[nextHash] 128 candidate2 := e.table[nextHash2] 129 repIndex := s - offset1 + 2 130 131 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} 132 e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} 133 134 if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { 135 // Consider history as well. 136 var seq seq 137 var length int32 138 // length = 4 + e.matchlen(s+6, repIndex+4, src) 139 { 140 a := src[s+6:] 141 b := src[repIndex+4:] 142 endI := len(a) & (math.MaxInt32 - 7) 143 length = int32(endI) + 4 144 for i := 0; i < endI; i += 8 { 145 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 146 length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 147 break 148 } 149 } 150 } 151 152 seq.matchLen = uint32(length - zstdMinMatch) 153 154 // We might be able to match backwards. 155 // Extend as long as we can. 156 start := s + 2 157 // We end the search early, so we don't risk 0 literals 158 // and have to do special offset treatment. 159 startLimit := nextEmit + 1 160 161 sMin := s - e.maxMatchOff 162 if sMin < 0 { 163 sMin = 0 164 } 165 for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { 166 repIndex-- 167 start-- 168 seq.matchLen++ 169 } 170 addLiterals(&seq, start) 171 172 // rep 0 173 seq.offset = 1 174 if debugSequences { 175 println("repeat sequence", seq, "next s:", s) 176 } 177 blk.sequences = append(blk.sequences, seq) 178 s += length + 2 179 nextEmit = s 180 if s >= sLimit { 181 if debug { 182 println("repeat ended", s, length) 183 184 } 185 break encodeLoop 186 } 187 cv = load6432(src, s) 188 continue 189 } 190 coffset0 := s - (candidate.offset - e.cur) 191 coffset1 := s - (candidate2.offset - e.cur) + 1 192 if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { 193 // found a regular match 194 t = candidate.offset - e.cur 195 if debugAsserts && s <= t { 196 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 197 } 198 if debugAsserts && s-t > e.maxMatchOff { 199 panic("s - t >e.maxMatchOff") 200 } 201 break 202 } 203 204 if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { 205 // found a regular match 206 t = candidate2.offset - e.cur 207 s++ 208 if debugAsserts && s <= t { 209 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 210 } 211 if debugAsserts && s-t > e.maxMatchOff { 212 panic("s - t >e.maxMatchOff") 213 } 214 if debugAsserts && t < 0 { 215 panic("t<0") 216 } 217 break 218 } 219 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 220 if s >= sLimit { 221 break encodeLoop 222 } 223 cv = load6432(src, s) 224 } 225 // A 4-byte match has been found. We'll later see if more than 4 bytes. 226 offset2 = offset1 227 offset1 = s - t 228 229 if debugAsserts && s <= t { 230 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 231 } 232 233 if debugAsserts && canRepeat && int(offset1) > len(src) { 234 panic("invalid offset") 235 } 236 237 // Extend the 4-byte match as long as possible. 238 //l := e.matchlen(s+4, t+4, src) + 4 239 var l int32 240 { 241 a := src[s+4:] 242 b := src[t+4:] 243 endI := len(a) & (math.MaxInt32 - 7) 244 l = int32(endI) + 4 245 for i := 0; i < endI; i += 8 { 246 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 247 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 248 break 249 } 250 } 251 } 252 253 // Extend backwards 254 tMin := s - e.maxMatchOff 255 if tMin < 0 { 256 tMin = 0 257 } 258 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 259 s-- 260 t-- 261 l++ 262 } 263 264 // Write our sequence. 265 var seq seq 266 seq.litLen = uint32(s - nextEmit) 267 seq.matchLen = uint32(l - zstdMinMatch) 268 if seq.litLen > 0 { 269 blk.literals = append(blk.literals, src[nextEmit:s]...) 270 } 271 // Don't use repeat offsets 272 seq.offset = uint32(s-t) + 3 273 s += l 274 if debugSequences { 275 println("sequence", seq, "next s:", s) 276 } 277 blk.sequences = append(blk.sequences, seq) 278 nextEmit = s 279 if s >= sLimit { 280 break encodeLoop 281 } 282 cv = load6432(src, s) 283 284 // Check offset 2 285 if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { 286 // We have at least 4 byte match. 287 // No need to check backwards. We come straight from a match 288 //l := 4 + e.matchlen(s+4, o2+4, src) 289 var l int32 290 { 291 a := src[s+4:] 292 b := src[o2+4:] 293 endI := len(a) & (math.MaxInt32 - 7) 294 l = int32(endI) + 4 295 for i := 0; i < endI; i += 8 { 296 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 297 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 298 break 299 } 300 } 301 } 302 303 // Store this, since we have it. 304 nextHash := hash6(cv, hashLog) 305 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} 306 seq.matchLen = uint32(l) - zstdMinMatch 307 seq.litLen = 0 308 // Since litlen is always 0, this is offset 1. 309 seq.offset = 1 310 s += l 311 nextEmit = s 312 if debugSequences { 313 println("sequence", seq, "next s:", s) 314 } 315 blk.sequences = append(blk.sequences, seq) 316 317 // Swap offset 1 and 2. 318 offset1, offset2 = offset2, offset1 319 if s >= sLimit { 320 break encodeLoop 321 } 322 // Prepare next loop. 323 cv = load6432(src, s) 324 } 325 } 326 327 if int(nextEmit) < len(src) { 328 blk.literals = append(blk.literals, src[nextEmit:]...) 329 blk.extraLits = len(src) - int(nextEmit) 330 } 331 blk.recentOffsets[0] = uint32(offset1) 332 blk.recentOffsets[1] = uint32(offset2) 333 if debug { 334 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 335 } 336} 337 338// EncodeNoHist will encode a block with no history and no following blocks. 339// Most notable difference is that src will not be copied for history and 340// we do not need to check for max match length. 341func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { 342 const ( 343 inputMargin = 8 344 minNonLiteralBlockSize = 1 + 1 + inputMargin 345 ) 346 if debug { 347 if len(src) > maxBlockSize { 348 panic("src too big") 349 } 350 } 351 352 // Protect against e.cur wraparound. 353 if e.cur >= bufferReset { 354 for i := range e.table[:] { 355 e.table[i] = tableEntry{} 356 } 357 e.cur = e.maxMatchOff 358 } 359 360 s := int32(0) 361 blk.size = len(src) 362 if len(src) < minNonLiteralBlockSize { 363 blk.extraLits = len(src) 364 blk.literals = blk.literals[:len(src)] 365 copy(blk.literals, src) 366 return 367 } 368 369 sLimit := int32(len(src)) - inputMargin 370 // stepSize is the number of bytes to skip on every main loop iteration. 371 // It should be >= 2. 372 const stepSize = 2 373 374 // TEMPLATE 375 const hashLog = tableBits 376 // seems global, but would be nice to tweak. 377 const kSearchStrength = 8 378 379 // nextEmit is where in src the next emitLiteral should start from. 380 nextEmit := s 381 cv := load6432(src, s) 382 383 // Relative offsets 384 offset1 := int32(blk.recentOffsets[0]) 385 offset2 := int32(blk.recentOffsets[1]) 386 387 addLiterals := func(s *seq, until int32) { 388 if until == nextEmit { 389 return 390 } 391 blk.literals = append(blk.literals, src[nextEmit:until]...) 392 s.litLen = uint32(until - nextEmit) 393 } 394 if debug { 395 println("recent offsets:", blk.recentOffsets) 396 } 397 398encodeLoop: 399 for { 400 // t will contain the match offset when we find one. 401 // When existing the search loop, we have already checked 4 bytes. 402 var t int32 403 404 // We will not use repeat offsets across blocks. 405 // By not using them for the first 3 matches 406 407 for { 408 nextHash := hash6(cv, hashLog) 409 nextHash2 := hash6(cv>>8, hashLog) 410 candidate := e.table[nextHash] 411 candidate2 := e.table[nextHash2] 412 repIndex := s - offset1 + 2 413 414 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} 415 e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} 416 417 if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { 418 // Consider history as well. 419 var seq seq 420 // length := 4 + e.matchlen(s+6, repIndex+4, src) 421 // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) 422 var length int32 423 { 424 a := src[s+6:] 425 b := src[repIndex+4:] 426 endI := len(a) & (math.MaxInt32 - 7) 427 length = int32(endI) + 4 428 for i := 0; i < endI; i += 8 { 429 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 430 length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 431 break 432 } 433 } 434 } 435 436 seq.matchLen = uint32(length - zstdMinMatch) 437 438 // We might be able to match backwards. 439 // Extend as long as we can. 440 start := s + 2 441 // We end the search early, so we don't risk 0 literals 442 // and have to do special offset treatment. 443 startLimit := nextEmit + 1 444 445 sMin := s - e.maxMatchOff 446 if sMin < 0 { 447 sMin = 0 448 } 449 for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] { 450 repIndex-- 451 start-- 452 seq.matchLen++ 453 } 454 addLiterals(&seq, start) 455 456 // rep 0 457 seq.offset = 1 458 if debugSequences { 459 println("repeat sequence", seq, "next s:", s) 460 } 461 blk.sequences = append(blk.sequences, seq) 462 s += length + 2 463 nextEmit = s 464 if s >= sLimit { 465 if debug { 466 println("repeat ended", s, length) 467 468 } 469 break encodeLoop 470 } 471 cv = load6432(src, s) 472 continue 473 } 474 coffset0 := s - (candidate.offset - e.cur) 475 coffset1 := s - (candidate2.offset - e.cur) + 1 476 if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { 477 // found a regular match 478 t = candidate.offset - e.cur 479 if debugAsserts && s <= t { 480 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 481 } 482 if debugAsserts && s-t > e.maxMatchOff { 483 panic("s - t >e.maxMatchOff") 484 } 485 if debugAsserts && t < 0 { 486 panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff)) 487 } 488 break 489 } 490 491 if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { 492 // found a regular match 493 t = candidate2.offset - e.cur 494 s++ 495 if debugAsserts && s <= t { 496 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 497 } 498 if debugAsserts && s-t > e.maxMatchOff { 499 panic("s - t >e.maxMatchOff") 500 } 501 if debugAsserts && t < 0 { 502 panic("t<0") 503 } 504 break 505 } 506 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 507 if s >= sLimit { 508 break encodeLoop 509 } 510 cv = load6432(src, s) 511 } 512 // A 4-byte match has been found. We'll later see if more than 4 bytes. 513 offset2 = offset1 514 offset1 = s - t 515 516 if debugAsserts && s <= t { 517 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 518 } 519 520 if debugAsserts && t < 0 { 521 panic(fmt.Sprintf("t (%d) < 0 ", t)) 522 } 523 // Extend the 4-byte match as long as possible. 524 //l := e.matchlenNoHist(s+4, t+4, src) + 4 525 // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 526 var l int32 527 { 528 a := src[s+4:] 529 b := src[t+4:] 530 endI := len(a) & (math.MaxInt32 - 7) 531 l = int32(endI) + 4 532 for i := 0; i < endI; i += 8 { 533 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 534 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 535 break 536 } 537 } 538 } 539 540 // Extend backwards 541 tMin := s - e.maxMatchOff 542 if tMin < 0 { 543 tMin = 0 544 } 545 for t > tMin && s > nextEmit && src[t-1] == src[s-1] { 546 s-- 547 t-- 548 l++ 549 } 550 551 // Write our sequence. 552 var seq seq 553 seq.litLen = uint32(s - nextEmit) 554 seq.matchLen = uint32(l - zstdMinMatch) 555 if seq.litLen > 0 { 556 blk.literals = append(blk.literals, src[nextEmit:s]...) 557 } 558 // Don't use repeat offsets 559 seq.offset = uint32(s-t) + 3 560 s += l 561 if debugSequences { 562 println("sequence", seq, "next s:", s) 563 } 564 blk.sequences = append(blk.sequences, seq) 565 nextEmit = s 566 if s >= sLimit { 567 break encodeLoop 568 } 569 cv = load6432(src, s) 570 571 // Check offset 2 572 if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) { 573 // We have at least 4 byte match. 574 // No need to check backwards. We come straight from a match 575 //l := 4 + e.matchlenNoHist(s+4, o2+4, src) 576 // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) 577 var l int32 578 { 579 a := src[s+4:] 580 b := src[o2+4:] 581 endI := len(a) & (math.MaxInt32 - 7) 582 l = int32(endI) + 4 583 for i := 0; i < endI; i += 8 { 584 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 585 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 586 break 587 } 588 } 589 } 590 591 // Store this, since we have it. 592 nextHash := hash6(cv, hashLog) 593 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} 594 seq.matchLen = uint32(l) - zstdMinMatch 595 seq.litLen = 0 596 // Since litlen is always 0, this is offset 1. 597 seq.offset = 1 598 s += l 599 nextEmit = s 600 if debugSequences { 601 println("sequence", seq, "next s:", s) 602 } 603 blk.sequences = append(blk.sequences, seq) 604 605 // Swap offset 1 and 2. 606 offset1, offset2 = offset2, offset1 607 if s >= sLimit { 608 break encodeLoop 609 } 610 // Prepare next loop. 611 cv = load6432(src, s) 612 } 613 } 614 615 if int(nextEmit) < len(src) { 616 blk.literals = append(blk.literals, src[nextEmit:]...) 617 blk.extraLits = len(src) - int(nextEmit) 618 } 619 if debug { 620 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 621 } 622 // We do not store history, so we must offset e.cur to avoid false matches for next user. 623 if e.cur < bufferReset { 624 e.cur += int32(len(src)) 625 } 626} 627 628// Encode will encode the content, with a dictionary if initialized for it. 629func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { 630 const ( 631 inputMargin = 8 632 minNonLiteralBlockSize = 1 + 1 + inputMargin 633 ) 634 if e.allDirty || len(src) > 32<<10 { 635 e.fastEncoder.Encode(blk, src) 636 e.allDirty = true 637 return 638 } 639 // Protect against e.cur wraparound. 640 for e.cur >= bufferReset { 641 if len(e.hist) == 0 { 642 for i := range e.table[:] { 643 e.table[i] = tableEntry{} 644 } 645 e.cur = e.maxMatchOff 646 break 647 } 648 // Shift down everything in the table that isn't already too far away. 649 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 650 for i := range e.table[:] { 651 v := e.table[i].offset 652 if v < minOff { 653 v = 0 654 } else { 655 v = v - e.cur + e.maxMatchOff 656 } 657 e.table[i].offset = v 658 } 659 e.cur = e.maxMatchOff 660 break 661 } 662 663 s := e.addBlock(src) 664 blk.size = len(src) 665 if len(src) < minNonLiteralBlockSize { 666 blk.extraLits = len(src) 667 blk.literals = blk.literals[:len(src)] 668 copy(blk.literals, src) 669 return 670 } 671 672 // Override src 673 src = e.hist 674 sLimit := int32(len(src)) - inputMargin 675 // stepSize is the number of bytes to skip on every main loop iteration. 676 // It should be >= 2. 677 const stepSize = 2 678 679 // TEMPLATE 680 const hashLog = tableBits 681 // seems global, but would be nice to tweak. 682 const kSearchStrength = 7 683 684 // nextEmit is where in src the next emitLiteral should start from. 685 nextEmit := s 686 cv := load6432(src, s) 687 688 // Relative offsets 689 offset1 := int32(blk.recentOffsets[0]) 690 offset2 := int32(blk.recentOffsets[1]) 691 692 addLiterals := func(s *seq, until int32) { 693 if until == nextEmit { 694 return 695 } 696 blk.literals = append(blk.literals, src[nextEmit:until]...) 697 s.litLen = uint32(until - nextEmit) 698 } 699 if debug { 700 println("recent offsets:", blk.recentOffsets) 701 } 702 703encodeLoop: 704 for { 705 // t will contain the match offset when we find one. 706 // When existing the search loop, we have already checked 4 bytes. 707 var t int32 708 709 // We will not use repeat offsets across blocks. 710 // By not using them for the first 3 matches 711 canRepeat := len(blk.sequences) > 2 712 713 for { 714 if debugAsserts && canRepeat && offset1 == 0 { 715 panic("offset0 was 0") 716 } 717 718 nextHash := hash6(cv, hashLog) 719 nextHash2 := hash6(cv>>8, hashLog) 720 candidate := e.table[nextHash] 721 candidate2 := e.table[nextHash2] 722 repIndex := s - offset1 + 2 723 724 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} 725 e.markShardDirty(nextHash) 726 e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} 727 e.markShardDirty(nextHash2) 728 729 if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { 730 // Consider history as well. 731 var seq seq 732 var length int32 733 // length = 4 + e.matchlen(s+6, repIndex+4, src) 734 { 735 a := src[s+6:] 736 b := src[repIndex+4:] 737 endI := len(a) & (math.MaxInt32 - 7) 738 length = int32(endI) + 4 739 for i := 0; i < endI; i += 8 { 740 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 741 length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 742 break 743 } 744 } 745 } 746 747 seq.matchLen = uint32(length - zstdMinMatch) 748 749 // We might be able to match backwards. 750 // Extend as long as we can. 751 start := s + 2 752 // We end the search early, so we don't risk 0 literals 753 // and have to do special offset treatment. 754 startLimit := nextEmit + 1 755 756 sMin := s - e.maxMatchOff 757 if sMin < 0 { 758 sMin = 0 759 } 760 for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { 761 repIndex-- 762 start-- 763 seq.matchLen++ 764 } 765 addLiterals(&seq, start) 766 767 // rep 0 768 seq.offset = 1 769 if debugSequences { 770 println("repeat sequence", seq, "next s:", s) 771 } 772 blk.sequences = append(blk.sequences, seq) 773 s += length + 2 774 nextEmit = s 775 if s >= sLimit { 776 if debug { 777 println("repeat ended", s, length) 778 779 } 780 break encodeLoop 781 } 782 cv = load6432(src, s) 783 continue 784 } 785 coffset0 := s - (candidate.offset - e.cur) 786 coffset1 := s - (candidate2.offset - e.cur) + 1 787 if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { 788 // found a regular match 789 t = candidate.offset - e.cur 790 if debugAsserts && s <= t { 791 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 792 } 793 if debugAsserts && s-t > e.maxMatchOff { 794 panic("s - t >e.maxMatchOff") 795 } 796 break 797 } 798 799 if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { 800 // found a regular match 801 t = candidate2.offset - e.cur 802 s++ 803 if debugAsserts && s <= t { 804 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 805 } 806 if debugAsserts && s-t > e.maxMatchOff { 807 panic("s - t >e.maxMatchOff") 808 } 809 if debugAsserts && t < 0 { 810 panic("t<0") 811 } 812 break 813 } 814 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 815 if s >= sLimit { 816 break encodeLoop 817 } 818 cv = load6432(src, s) 819 } 820 // A 4-byte match has been found. We'll later see if more than 4 bytes. 821 offset2 = offset1 822 offset1 = s - t 823 824 if debugAsserts && s <= t { 825 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 826 } 827 828 if debugAsserts && canRepeat && int(offset1) > len(src) { 829 panic("invalid offset") 830 } 831 832 // Extend the 4-byte match as long as possible. 833 //l := e.matchlen(s+4, t+4, src) + 4 834 var l int32 835 { 836 a := src[s+4:] 837 b := src[t+4:] 838 endI := len(a) & (math.MaxInt32 - 7) 839 l = int32(endI) + 4 840 for i := 0; i < endI; i += 8 { 841 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 842 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 843 break 844 } 845 } 846 } 847 848 // Extend backwards 849 tMin := s - e.maxMatchOff 850 if tMin < 0 { 851 tMin = 0 852 } 853 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 854 s-- 855 t-- 856 l++ 857 } 858 859 // Write our sequence. 860 var seq seq 861 seq.litLen = uint32(s - nextEmit) 862 seq.matchLen = uint32(l - zstdMinMatch) 863 if seq.litLen > 0 { 864 blk.literals = append(blk.literals, src[nextEmit:s]...) 865 } 866 // Don't use repeat offsets 867 seq.offset = uint32(s-t) + 3 868 s += l 869 if debugSequences { 870 println("sequence", seq, "next s:", s) 871 } 872 blk.sequences = append(blk.sequences, seq) 873 nextEmit = s 874 if s >= sLimit { 875 break encodeLoop 876 } 877 cv = load6432(src, s) 878 879 // Check offset 2 880 if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { 881 // We have at least 4 byte match. 882 // No need to check backwards. We come straight from a match 883 //l := 4 + e.matchlen(s+4, o2+4, src) 884 var l int32 885 { 886 a := src[s+4:] 887 b := src[o2+4:] 888 endI := len(a) & (math.MaxInt32 - 7) 889 l = int32(endI) + 4 890 for i := 0; i < endI; i += 8 { 891 if diff := load64(a, i) ^ load64(b, i); diff != 0 { 892 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 893 break 894 } 895 } 896 } 897 898 // Store this, since we have it. 899 nextHash := hash6(cv, hashLog) 900 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} 901 e.markShardDirty(nextHash) 902 seq.matchLen = uint32(l) - zstdMinMatch 903 seq.litLen = 0 904 // Since litlen is always 0, this is offset 1. 905 seq.offset = 1 906 s += l 907 nextEmit = s 908 if debugSequences { 909 println("sequence", seq, "next s:", s) 910 } 911 blk.sequences = append(blk.sequences, seq) 912 913 // Swap offset 1 and 2. 914 offset1, offset2 = offset2, offset1 915 if s >= sLimit { 916 break encodeLoop 917 } 918 // Prepare next loop. 919 cv = load6432(src, s) 920 } 921 } 922 923 if int(nextEmit) < len(src) { 924 blk.literals = append(blk.literals, src[nextEmit:]...) 925 blk.extraLits = len(src) - int(nextEmit) 926 } 927 blk.recentOffsets[0] = uint32(offset1) 928 blk.recentOffsets[1] = uint32(offset2) 929 if debug { 930 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 931 } 932} 933 934// ResetDict will reset and set a dictionary if not nil 935func (e *fastEncoder) Reset(d *dict, singleBlock bool) { 936 e.resetBase(d, singleBlock) 937 if d != nil { 938 panic("fastEncoder: Reset with dict") 939 } 940} 941 942// ResetDict will reset and set a dictionary if not nil 943func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { 944 e.resetBase(d, singleBlock) 945 if d == nil { 946 return 947 } 948 949 // Init or copy dict table 950 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { 951 if len(e.dictTable) != len(e.table) { 952 e.dictTable = make([]tableEntry, len(e.table)) 953 } 954 if true { 955 end := e.maxMatchOff + int32(len(d.content)) - 8 956 for i := e.maxMatchOff; i < end; i += 3 { 957 const hashLog = tableBits 958 959 cv := load6432(d.content, i-e.maxMatchOff) 960 nextHash := hash6(cv, hashLog) // 0 -> 5 961 nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6 962 nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7 963 e.dictTable[nextHash] = tableEntry{ 964 val: uint32(cv), 965 offset: i, 966 } 967 e.dictTable[nextHash1] = tableEntry{ 968 val: uint32(cv >> 8), 969 offset: i + 1, 970 } 971 e.dictTable[nextHash2] = tableEntry{ 972 val: uint32(cv >> 16), 973 offset: i + 2, 974 } 975 } 976 } 977 e.lastDictID = d.id 978 e.allDirty = true 979 } 980 981 e.cur = e.maxMatchOff 982 dirtyShardCnt := 0 983 if !e.allDirty { 984 for i := range e.tableShardDirty { 985 if e.tableShardDirty[i] { 986 dirtyShardCnt++ 987 } 988 } 989 } 990 991 const shardCnt = tableShardCnt 992 const shardSize = tableShardSize 993 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 994 copy(e.table[:], e.dictTable) 995 for i := range e.tableShardDirty { 996 e.tableShardDirty[i] = false 997 } 998 e.allDirty = false 999 return 1000 } 1001 for i := range e.tableShardDirty { 1002 if !e.tableShardDirty[i] { 1003 continue 1004 } 1005 1006 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) 1007 e.tableShardDirty[i] = false 1008 } 1009 e.allDirty = false 1010} 1011 1012func (e *fastEncoderDict) markAllShardsDirty() { 1013 e.allDirty = true 1014} 1015 1016func (e *fastEncoderDict) markShardDirty(entryNum uint32) { 1017 e.tableShardDirty[entryNum/tableShardSize] = true 1018} 1019