1// Copyright 2011 The Snappy-Go Authors. All rights reserved. 2// Copyright (c) 2019 Klaus Post. All rights reserved. 3// Use of this source code is governed by a BSD-style 4// license that can be found in the LICENSE file. 5 6// Package s2 implements the S2 compression format. 7// 8// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput, 9// which is why it features concurrent compression for bigger payloads. 10// 11// Decoding is compatible with Snappy compressed content, 12// but content compressed with S2 cannot be decompressed by Snappy. 13// 14// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2 15// 16// There are actually two S2 formats: block and stream. They are related, 17// but different: trying to decompress block-compressed data as a S2 stream 18// will fail, and vice versa. The block format is the Decode and Encode 19// functions and the stream format is the Reader and Writer types. 20// 21// A "better" compression option is available. This will trade some compression 22// speed 23// 24// The block format, the more common case, is used when the complete size (the 25// number of bytes) of the original data is known upfront, at the time 26// compression starts. The stream format, also known as the framing format, is 27// for when that isn't always true. 28// 29// Blocks to not offer much data protection, so it is up to you to 30// add data validation of decompressed blocks. 31// 32// Streams perform CRC validation of the decompressed data. 33// Stream compression will also be performed on multiple CPU cores concurrently 34// significantly improving throughput. 35package s2 36 37import ( 38 "bytes" 39 "hash/crc32" 40) 41 42/* 43Each encoded block begins with the varint-encoded length of the decoded data, 44followed by a sequence of chunks. Chunks begin and end on byte boundaries. The 45first byte of each chunk is broken into its 2 least and 6 most significant bits 46called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. 47Zero means a literal tag. All other values mean a copy tag. 48 49For literal tags: 50 - If m < 60, the next 1 + m bytes are literal bytes. 51 - Otherwise, let n be the little-endian unsigned integer denoted by the next 52 m - 59 bytes. The next 1 + n bytes after that are literal bytes. 53 54For copy tags, length bytes are copied from offset bytes ago, in the style of 55Lempel-Ziv compression algorithms. In particular: 56 - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). 57 The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 58 of the offset. The next byte is bits 0-7 of the offset. 59 - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). 60 The length is 1 + m. The offset is the little-endian unsigned integer 61 denoted by the next 2 bytes. 62 - For l == 3, the offset ranges in [0, 1<<32) and the length in 63 [1, 65). The length is 1 + m. The offset is the little-endian unsigned 64 integer denoted by the next 4 bytes. 65*/ 66const ( 67 tagLiteral = 0x00 68 tagCopy1 = 0x01 69 tagCopy2 = 0x02 70 tagCopy4 = 0x03 71) 72 73const ( 74 checksumSize = 4 75 chunkHeaderSize = 4 76 magicChunk = "\xff\x06\x00\x00" + magicBody 77 magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy 78 magicBodySnappy = "sNaPpY" 79 magicBody = "S2sTwO" 80 81 // maxBlockSize is the maximum size of the input to encodeBlock. 82 // 83 // For the framing format (Writer type instead of Encode function), 84 // this is the maximum uncompressed size of a block. 85 maxBlockSize = 4 << 20 86 87 // minBlockSize is the minimum size of block setting when creating a writer. 88 minBlockSize = 4 << 10 89 90 // Default block size 91 defaultBlockSize = 1 << 20 92 93 // maxSnappyBlockSize is the maximum snappy block size. 94 maxSnappyBlockSize = 1 << 16 95 96 obufHeaderLen = checksumSize + chunkHeaderSize 97) 98 99const ( 100 chunkTypeCompressedData = 0x00 101 chunkTypeUncompressedData = 0x01 102 chunkTypePadding = 0xfe 103 chunkTypeStreamIdentifier = 0xff 104) 105 106var crcTable = crc32.MakeTable(crc32.Castagnoli) 107 108// crc implements the checksum specified in section 3 of 109// https://github.com/google/snappy/blob/master/framing_format.txt 110func crc(b []byte) uint32 { 111 c := crc32.Update(0, crcTable, b) 112 return c>>15 | c<<17 + 0xa282ead8 113} 114 115// literalExtraSize returns the extra size of encoding n literals. 116// n should be >= 0 and <= math.MaxUint32. 117func literalExtraSize(n int64) int64 { 118 if n == 0 { 119 return 0 120 } 121 switch { 122 case n < 60: 123 return 1 124 case n < 1<<8: 125 return 2 126 case n < 1<<16: 127 return 3 128 case n < 1<<24: 129 return 4 130 default: 131 return 5 132 } 133} 134 135type byter interface { 136 Bytes() []byte 137} 138 139var _ byter = &bytes.Buffer{} 140