1// Copyright 2015, Joe Tsai. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE.md file. 4 5// Package meta implements the XFLATE meta encoding scheme. 6// 7// The XFLATE meta encoding is a method of encoding arbitrary data into one 8// or more RFC 1951 compliant DEFLATE blocks. This encoding has the special 9// property that when the blocks are decoded by a RFC 1951 compliant 10// decompressor, they produce absolutely no output. However, when decoded with 11// the XFLATE meta decoder, it losslessly produces the original input. 12// 13// The meta encoding works by encoding arbitrary data into the Huffman tree 14// definition of dynamic DEFLATE blocks. These blocks have an empty data section 15// and produce no output. Due to the Huffman definition overhead, the encoded 16// output is usually larger than the input. However, for most input datasets, 17// this encoding scheme is able to achieve an efficiency of at least 50%. 18package meta 19 20import ( 21 "fmt" 22 23 "github.com/dsnet/compress/internal/errors" 24 "github.com/dsnet/compress/internal/prefix" 25) 26 27// These are the magic values that begin every single meta block. 28const ( 29 magicVals uint32 = 0x05860004 30 magicMask uint32 = 0xfffe3fc6 31) 32 33// ReverseSearch searches for a meta header in reverse. This returns the last 34// index where the header was found. If not found, it returns -1. 35func ReverseSearch(data []byte) int { 36 var magic uint32 37 for i := len(data) - 1; i >= 0; i-- { 38 magic = (magic << 8) | uint32(data[i]) 39 if magic&magicMask == magicVals { 40 return i 41 } 42 } 43 return -1 44} 45 46const ( 47 maxSyms = 257 // Maximum number of literal codes (with EOB marker) 48 minHuffLen = 1 // Minimum number of bits for each Huffman code 49 maxHuffLen = 7 // Maximum number of bits for each Huffman code 50 minRepLast = 3 // Minimum number of repeated codes (clen: 16) 51 maxRepLast = 6 // Maximum number of repeated codes (clen: 16) 52 minRepZero = 11 // Minimum number of repeated zeros (clen: 18) 53 maxRepZero = 138 // Maximum number of repeated zeros (clen: 18) 54) 55 56// These are some constants regarding the theoretical and practical limits for 57// the meta encoding of a single block. 58const ( 59 // MinRawBytes and MaxRawBytes are the theoretical minimum and maximum 60 // number of bytes a block can encode. 61 MinRawBytes = 0 62 MaxRawBytes = 31 63 64 // MinEncBytes and MaxEncBytes are the theoretical minimum and maximum 65 // number of bytes an encoded block will occupy. 66 MinEncBytes = 12 67 MaxEncBytes = 64 68 69 // EnsureRawBytes is the maximum number of bytes that a single block is 70 // ensured to encode (computed using brute force). 71 EnsureRawBytes = 22 72) 73 74// FinalMode controls or indicates which final bits are set in the last block 75// in the meta stream. In the meta encoding, there are 2 final bits: 76// 77// Stream: This is the final bit from DEFLATE (as defined in RFC 1951). 78// and indicates that the entire compression stream has come to an end. 79// This bit indicate absolute termination of the stream. 80// 81// Meta: This final bit indicates that the current sequence of meta blocks has 82// terminated. The decoded data from those blocks form a meta substream. 83// This bit is used as a form of message framing for the meta encoding format. 84// 85// It invalid for the stream final bit to be set, while the meta final bit is 86// not set. All other combinations are legal. 87type FinalMode int 88 89const ( 90 // FinalNil has neither the stream nor meta final bits set. 91 FinalNil FinalMode = iota 92 93 // FinalMeta has the final bit set, but not stream final bit. 94 FinalMeta 95 96 // FinalStream has both the meta and stream final bits set. 97 FinalStream 98) 99 100// The Huffman encoding used by the XFLATE meta encoding uses a partially 101// pre-determined HCLEN table. The symbols are 0, 16, 18, and another symbol 102// between minHuffLen and maxHuffLen, inclusively. The 0 symbol represents a 103// logical zero in the meta encoding, and the symbol between minHuffLen and 104// maxHuffLen represents a logical one. Symbols 16 and 18 are used to provide a 105// primitive form of run-length encoding. The codes that these symbols map to 106// are fixed and are shown below. 107// 108// Code Symbol 109// 0 <=> 0 (symZero) 110// 10 <=> minHuffLen..maxHuffLen (symOne) 111// 110 <=> 16 (symRepLast) 112// 111 <=> 18 (symRepZero) 113// 114// The symZero symbol occupies 1 bit since it is the most commonly needed bit, 115// while symOne occupies 2 bits. Thus, it is cheaper to encode logical zeros 116// than it is to encode logical ones. The invert bit in the meta encoding takes 117// advantage of this fact and allows all data bits to be inverted so that the 118// number zero bits is greater than the number of one bits. 119const ( 120 symZero = iota // Symbol to encode a input zero (clen: 0) 121 symOne // Symbol to encode a input one (clen: minHuffLen..maxHuffLen) 122 symRepLast // Symbol to repeat last symbol (clen: 16) 123 symRepZero // Symbol to repeat zero symbol (clen: 18) 124) 125 126var encHuff, decHuff = func() (enc prefix.Encoder, dec prefix.Decoder) { 127 codes := [4]prefix.PrefixCode{ 128 {Sym: symZero, Len: 1}, 129 {Sym: symOne, Len: 2}, 130 {Sym: symRepLast, Len: 3}, 131 {Sym: symRepZero, Len: 3}, 132 } 133 prefix.GeneratePrefixes(codes[:]) 134 enc.Init(codes[:]) 135 dec.Init(codes[:]) 136 return 137}() 138 139func errorf(c int, f string, a ...interface{}) error { 140 return errors.Error{Code: c, Pkg: "meta", Msg: fmt.Sprintf(f, a...)} 141} 142 143var errClosed = errorf(errors.Closed, "") 144 145// oneBitsLUT reports the number of set bits in the input byte. 146var oneBitsLUT = func() (lut [256]byte) { 147 for i := range lut[:] { 148 for b := byte(i); b > 0; b >>= 1 { 149 lut[i] += b & 1 150 } 151 } 152 return 153}() 154 155// numBits counts the number of zero and one bits in the byte. 156func numBits(b byte) (zeros, ones int) { 157 ones = int(oneBitsLUT[b]) 158 zeros = 8 - ones 159 return 160} 161 162// numPads computes number of bits needed to pad n-bits to a byte alignment. 163func numPads(n uint) uint { 164 return -n & 7 165} 166 167// btoi converts a bool to a integer 0 or 1. 168func btoi(b bool) int { 169 if b { 170 return 1 171 } 172 return 0 173} 174