1// Copyright 2011 The Snappy-Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package snappy implements the snappy block-based compression format. 6// It aims for very high speeds and reasonable compression. 7// 8// The C++ snappy implementation is at https://github.com/google/snappy 9package snappy // import "github.com/golang/snappy" 10 11import ( 12 "hash/crc32" 13) 14 15/* 16Each encoded block begins with the varint-encoded length of the decoded data, 17followed by a sequence of chunks. Chunks begin and end on byte boundaries. The 18first byte of each chunk is broken into its 2 least and 6 most significant bits 19called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. 20Zero means a literal tag. All other values mean a copy tag. 21 22For literal tags: 23 - If m < 60, the next 1 + m bytes are literal bytes. 24 - Otherwise, let n be the little-endian unsigned integer denoted by the next 25 m - 59 bytes. The next 1 + n bytes after that are literal bytes. 26 27For copy tags, length bytes are copied from offset bytes ago, in the style of 28Lempel-Ziv compression algorithms. In particular: 29 - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). 30 The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 31 of the offset. The next byte is bits 0-7 of the offset. 32 - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). 33 The length is 1 + m. The offset is the little-endian unsigned integer 34 denoted by the next 2 bytes. 35 - For l == 3, this tag is a legacy format that is no longer issued by most 36 encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in 37 [1, 65). The length is 1 + m. The offset is the little-endian unsigned 38 integer denoted by the next 4 bytes. 39*/ 40const ( 41 tagLiteral = 0x00 42 tagCopy1 = 0x01 43 tagCopy2 = 0x02 44 tagCopy4 = 0x03 45) 46 47const ( 48 checksumSize = 4 49 chunkHeaderSize = 4 50 magicChunk = "\xff\x06\x00\x00" + magicBody 51 magicBody = "sNaPpY" 52 53 // maxBlockSize is the maximum size of the input to encodeBlock. It is not 54 // part of the wire format per se, but some parts of the encoder assume 55 // that an offset fits into a uint16. 56 // 57 // Also, for the framing format (Writer type instead of Encode function), 58 // https://github.com/google/snappy/blob/master/framing_format.txt says 59 // that "the uncompressed data in a chunk must be no longer than 65536 60 // bytes". 61 maxBlockSize = 65536 62 63 // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is 64 // hard coded to be a const instead of a variable, so that obufLen can also 65 // be a const. Their equivalence is confirmed by 66 // TestMaxEncodedLenOfMaxBlockSize. 67 maxEncodedLenOfMaxBlockSize = 76490 68 69 obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize 70 obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize 71) 72 73const ( 74 chunkTypeCompressedData = 0x00 75 chunkTypeUncompressedData = 0x01 76 chunkTypePadding = 0xfe 77 chunkTypeStreamIdentifier = 0xff 78) 79 80var crcTable = crc32.MakeTable(crc32.Castagnoli) 81 82// crc implements the checksum specified in section 3 of 83// https://github.com/google/snappy/blob/master/framing_format.txt 84func crc(b []byte) uint32 { 85 c := crc32.Update(0, crcTable, b) 86 return uint32(c>>15|c<<17) + 0xa282ead8 87} 88