1// Copyright 2011 The Snappy-Go Authors. All rights reserved.
2// Copyright (c) 2019 Klaus Post. All rights reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6// Package s2 implements the S2 compression format.
7//
8// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
9// which is why it features concurrent compression for bigger payloads.
10//
11// Decoding is compatible with Snappy compressed content,
12// but content compressed with S2 cannot be decompressed by Snappy.
13//
14// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
15//
16// There are actually two S2 formats: block and stream. They are related,
17// but different: trying to decompress block-compressed data as a S2 stream
18// will fail, and vice versa. The block format is the Decode and Encode
19// functions and the stream format is the Reader and Writer types.
20//
21// A "better" compression option is available. This will trade some compression
22// speed
23//
24// The block format, the more common case, is used when the complete size (the
25// number of bytes) of the original data is known upfront, at the time
26// compression starts. The stream format, also known as the framing format, is
27// for when that isn't always true.
28//
29// Blocks to not offer much data protection, so it is up to you to
30// add data validation of decompressed blocks.
31//
32// Streams perform CRC validation of the decompressed data.
33// Stream compression will also be performed on multiple CPU cores concurrently
34// significantly improving throughput.
35package s2
36
37import (
38	"bytes"
39	"hash/crc32"
40)
41
42/*
43Each encoded block begins with the varint-encoded length of the decoded data,
44followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
45first byte of each chunk is broken into its 2 least and 6 most significant bits
46called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
47Zero means a literal tag. All other values mean a copy tag.
48
49For literal tags:
50  - If m < 60, the next 1 + m bytes are literal bytes.
51  - Otherwise, let n be the little-endian unsigned integer denoted by the next
52    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
53
54For copy tags, length bytes are copied from offset bytes ago, in the style of
55Lempel-Ziv compression algorithms. In particular:
56  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
57    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
58    of the offset. The next byte is bits 0-7 of the offset.
59  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
60    The length is 1 + m. The offset is the little-endian unsigned integer
61    denoted by the next 2 bytes.
62  - For l == 3, the offset ranges in [0, 1<<32) and the length in
63    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
64    integer denoted by the next 4 bytes.
65*/
66const (
67	tagLiteral = 0x00
68	tagCopy1   = 0x01
69	tagCopy2   = 0x02
70	tagCopy4   = 0x03
71)
72
73const (
74	checksumSize     = 4
75	chunkHeaderSize  = 4
76	magicChunk       = "\xff\x06\x00\x00" + magicBody
77	magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
78	magicBodySnappy  = "sNaPpY"
79	magicBody        = "S2sTwO"
80
81	// maxBlockSize is the maximum size of the input to encodeBlock.
82	//
83	// For the framing format (Writer type instead of Encode function),
84	// this is the maximum uncompressed size of a block.
85	maxBlockSize = 4 << 20
86
87	// minBlockSize is the minimum size of block setting when creating a writer.
88	minBlockSize = 4 << 10
89
90	// Default block size
91	defaultBlockSize = 1 << 20
92
93	// maxSnappyBlockSize is the maximum snappy block size.
94	maxSnappyBlockSize = 1 << 16
95
96	obufHeaderLen = checksumSize + chunkHeaderSize
97)
98
99const (
100	chunkTypeCompressedData   = 0x00
101	chunkTypeUncompressedData = 0x01
102	chunkTypePadding          = 0xfe
103	chunkTypeStreamIdentifier = 0xff
104)
105
106var crcTable = crc32.MakeTable(crc32.Castagnoli)
107
108// crc implements the checksum specified in section 3 of
109// https://github.com/google/snappy/blob/master/framing_format.txt
110func crc(b []byte) uint32 {
111	c := crc32.Update(0, crcTable, b)
112	return c>>15 | c<<17 + 0xa282ead8
113}
114
115// literalExtraSize returns the extra size of encoding n literals.
116// n should be >= 0 and <= math.MaxUint32.
117func literalExtraSize(n int64) int64 {
118	if n == 0 {
119		return 0
120	}
121	switch {
122	case n < 60:
123		return 1
124	case n < 1<<8:
125		return 2
126	case n < 1<<16:
127		return 3
128	case n < 1<<24:
129		return 4
130	default:
131		return 5
132	}
133}
134
135type byter interface {
136	Bytes() []byte
137}
138
139var _ byter = &bytes.Buffer{}
140