1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package flate
6
7import (
8	"io"
9)
10
11const (
12	// The largest offset code.
13	offsetCodeCount = 30
14
15	// The special code used to mark the end of a block.
16	endBlockMarker = 256
17
18	// The first length code.
19	lengthCodesStart = 257
20
21	// The number of codegen codes.
22	codegenCodeCount = 19
23	badCode          = 255
24
25	// bufferFlushSize indicates the buffer size
26	// after which bytes are flushed to the writer.
27	// Should preferably be a multiple of 6, since
28	// we accumulate 6 bytes between writes to the buffer.
29	bufferFlushSize = 240
30
31	// bufferSize is the actual output byte buffer size.
32	// It must have additional headroom for a flush
33	// which can contain up to 8 bytes.
34	bufferSize = bufferFlushSize + 8
35)
36
37// The number of extra bits needed by length code X - LENGTH_CODES_START.
38var lengthExtraBits = []int8{
39	/* 257 */ 0, 0, 0,
40	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
41	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
42	/* 280 */ 4, 5, 5, 5, 5, 0,
43}
44
45// The length indicated by length code X - LENGTH_CODES_START.
46var lengthBase = []uint32{
47	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
48	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
49	64, 80, 96, 112, 128, 160, 192, 224, 255,
50}
51
52// offset code word extra bits.
53var offsetExtraBits = []int8{
54	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
55	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
56	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
57}
58
59var offsetBase = []uint32{
60	0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
61	0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
62	0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
63	0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
64	0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
65	0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
66}
67
68// The odd order in which the codegen code sizes are written.
69var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
70
71type huffmanBitWriter struct {
72	// writer is the underlying writer.
73	// Do not use it directly; use the write method, which ensures
74	// that Write errors are sticky.
75	writer io.Writer
76
77	// Data waiting to be written is bytes[0:nbytes]
78	// and then the low nbits of bits.
79	bits            uint64
80	nbits           uint
81	bytes           [bufferSize]byte
82	codegenFreq     [codegenCodeCount]int32
83	nbytes          int
84	literalFreq     []int32
85	offsetFreq      []int32
86	codegen         []uint8
87	literalEncoding *huffmanEncoder
88	offsetEncoding  *huffmanEncoder
89	codegenEncoding *huffmanEncoder
90	err             error
91}
92
93func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
94	return &huffmanBitWriter{
95		writer:          w,
96		literalFreq:     make([]int32, maxNumLit),
97		offsetFreq:      make([]int32, offsetCodeCount),
98		codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
99		literalEncoding: newHuffmanEncoder(maxNumLit),
100		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
101		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
102	}
103}
104
105func (w *huffmanBitWriter) reset(writer io.Writer) {
106	w.writer = writer
107	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
108	w.bytes = [bufferSize]byte{}
109}
110
111func (w *huffmanBitWriter) flush() {
112	if w.err != nil {
113		w.nbits = 0
114		return
115	}
116	n := w.nbytes
117	for w.nbits != 0 {
118		w.bytes[n] = byte(w.bits)
119		w.bits >>= 8
120		if w.nbits > 8 { // Avoid underflow
121			w.nbits -= 8
122		} else {
123			w.nbits = 0
124		}
125		n++
126	}
127	w.bits = 0
128	w.write(w.bytes[:n])
129	w.nbytes = 0
130}
131
132func (w *huffmanBitWriter) write(b []byte) {
133	if w.err != nil {
134		return
135	}
136	_, w.err = w.writer.Write(b)
137}
138
139func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
140	if w.err != nil {
141		return
142	}
143	w.bits |= uint64(b) << w.nbits
144	w.nbits += nb
145	if w.nbits >= 48 {
146		bits := w.bits
147		w.bits >>= 48
148		w.nbits -= 48
149		n := w.nbytes
150		bytes := w.bytes[n : n+6]
151		bytes[0] = byte(bits)
152		bytes[1] = byte(bits >> 8)
153		bytes[2] = byte(bits >> 16)
154		bytes[3] = byte(bits >> 24)
155		bytes[4] = byte(bits >> 32)
156		bytes[5] = byte(bits >> 40)
157		n += 6
158		if n >= bufferFlushSize {
159			w.write(w.bytes[:n])
160			n = 0
161		}
162		w.nbytes = n
163	}
164}
165
166func (w *huffmanBitWriter) writeBytes(bytes []byte) {
167	if w.err != nil {
168		return
169	}
170	n := w.nbytes
171	if w.nbits&7 != 0 {
172		w.err = InternalError("writeBytes with unfinished bits")
173		return
174	}
175	for w.nbits != 0 {
176		w.bytes[n] = byte(w.bits)
177		w.bits >>= 8
178		w.nbits -= 8
179		n++
180	}
181	if n != 0 {
182		w.write(w.bytes[:n])
183	}
184	w.nbytes = 0
185	w.write(bytes)
186}
187
188// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
189// the literal and offset lengths arrays (which are concatenated into a single
190// array).  This method generates that run-length encoding.
191//
192// The result is written into the codegen array, and the frequencies
193// of each code is written into the codegenFreq array.
194// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
195// information. Code badCode is an end marker
196//
197//  numLiterals      The number of literals in literalEncoding
198//  numOffsets       The number of offsets in offsetEncoding
199//  litenc, offenc   The literal and offset encoder to use
200func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
201	for i := range w.codegenFreq {
202		w.codegenFreq[i] = 0
203	}
204	// Note that we are using codegen both as a temporary variable for holding
205	// a copy of the frequencies, and as the place where we put the result.
206	// This is fine because the output is always shorter than the input used
207	// so far.
208	codegen := w.codegen // cache
209	// Copy the concatenated code sizes to codegen. Put a marker at the end.
210	cgnl := codegen[:numLiterals]
211	for i := range cgnl {
212		cgnl[i] = uint8(litEnc.codes[i].len)
213	}
214
215	cgnl = codegen[numLiterals : numLiterals+numOffsets]
216	for i := range cgnl {
217		cgnl[i] = uint8(offEnc.codes[i].len)
218	}
219	codegen[numLiterals+numOffsets] = badCode
220
221	size := codegen[0]
222	count := 1
223	outIndex := 0
224	for inIndex := 1; size != badCode; inIndex++ {
225		// INVARIANT: We have seen "count" copies of size that have not yet
226		// had output generated for them.
227		nextSize := codegen[inIndex]
228		if nextSize == size {
229			count++
230			continue
231		}
232		// We need to generate codegen indicating "count" of size.
233		if size != 0 {
234			codegen[outIndex] = size
235			outIndex++
236			w.codegenFreq[size]++
237			count--
238			for count >= 3 {
239				n := 6
240				if n > count {
241					n = count
242				}
243				codegen[outIndex] = 16
244				outIndex++
245				codegen[outIndex] = uint8(n - 3)
246				outIndex++
247				w.codegenFreq[16]++
248				count -= n
249			}
250		} else {
251			for count >= 11 {
252				n := 138
253				if n > count {
254					n = count
255				}
256				codegen[outIndex] = 18
257				outIndex++
258				codegen[outIndex] = uint8(n - 11)
259				outIndex++
260				w.codegenFreq[18]++
261				count -= n
262			}
263			if count >= 3 {
264				// count >= 3 && count <= 10
265				codegen[outIndex] = 17
266				outIndex++
267				codegen[outIndex] = uint8(count - 3)
268				outIndex++
269				w.codegenFreq[17]++
270				count = 0
271			}
272		}
273		count--
274		for ; count >= 0; count-- {
275			codegen[outIndex] = size
276			outIndex++
277			w.codegenFreq[size]++
278		}
279		// Set up invariant for next time through the loop.
280		size = nextSize
281		count = 1
282	}
283	// Marker indicating the end of the codegen.
284	codegen[outIndex] = badCode
285}
286
287// dynamicSize returns the size of dynamically encoded data in bits.
288func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
289	numCodegens = len(w.codegenFreq)
290	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
291		numCodegens--
292	}
293	header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
294		w.codegenEncoding.bitLength(w.codegenFreq[:]) +
295		int(w.codegenFreq[16])*2 +
296		int(w.codegenFreq[17])*3 +
297		int(w.codegenFreq[18])*7
298	size = header +
299		litEnc.bitLength(w.literalFreq) +
300		offEnc.bitLength(w.offsetFreq) +
301		extraBits
302
303	return size, numCodegens
304}
305
306// fixedSize returns the size of dynamically encoded data in bits.
307func (w *huffmanBitWriter) fixedSize(extraBits int) int {
308	return 3 +
309		fixedLiteralEncoding.bitLength(w.literalFreq) +
310		fixedOffsetEncoding.bitLength(w.offsetFreq) +
311		extraBits
312}
313
314// storedSize calculates the stored size, including header.
315// The function returns the size in bits and whether the block
316// fits inside a single block.
317func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
318	if in == nil {
319		return 0, false
320	}
321	if len(in) <= maxStoreBlockSize {
322		return (len(in) + 5) * 8, true
323	}
324	return 0, false
325}
326
327func (w *huffmanBitWriter) writeCode(c hcode) {
328	if w.err != nil {
329		return
330	}
331	w.bits |= uint64(c.code) << w.nbits
332	w.nbits += uint(c.len)
333	if w.nbits >= 48 {
334		bits := w.bits
335		w.bits >>= 48
336		w.nbits -= 48
337		n := w.nbytes
338		bytes := w.bytes[n : n+6]
339		bytes[0] = byte(bits)
340		bytes[1] = byte(bits >> 8)
341		bytes[2] = byte(bits >> 16)
342		bytes[3] = byte(bits >> 24)
343		bytes[4] = byte(bits >> 32)
344		bytes[5] = byte(bits >> 40)
345		n += 6
346		if n >= bufferFlushSize {
347			w.write(w.bytes[:n])
348			n = 0
349		}
350		w.nbytes = n
351	}
352}
353
354// Write the header of a dynamic Huffman block to the output stream.
355//
356//  numLiterals  The number of literals specified in codegen
357//  numOffsets   The number of offsets specified in codegen
358//  numCodegens  The number of codegens used in codegen
359func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
360	if w.err != nil {
361		return
362	}
363	var firstBits int32 = 4
364	if isEof {
365		firstBits = 5
366	}
367	w.writeBits(firstBits, 3)
368	w.writeBits(int32(numLiterals-257), 5)
369	w.writeBits(int32(numOffsets-1), 5)
370	w.writeBits(int32(numCodegens-4), 4)
371
372	for i := 0; i < numCodegens; i++ {
373		value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
374		w.writeBits(int32(value), 3)
375	}
376
377	i := 0
378	for {
379		var codeWord int = int(w.codegen[i])
380		i++
381		if codeWord == badCode {
382			break
383		}
384		w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
385
386		switch codeWord {
387		case 16:
388			w.writeBits(int32(w.codegen[i]), 2)
389			i++
390			break
391		case 17:
392			w.writeBits(int32(w.codegen[i]), 3)
393			i++
394			break
395		case 18:
396			w.writeBits(int32(w.codegen[i]), 7)
397			i++
398			break
399		}
400	}
401}
402
403func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
404	if w.err != nil {
405		return
406	}
407	var flag int32
408	if isEof {
409		flag = 1
410	}
411	w.writeBits(flag, 3)
412	w.flush()
413	w.writeBits(int32(length), 16)
414	w.writeBits(int32(^uint16(length)), 16)
415}
416
417func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
418	if w.err != nil {
419		return
420	}
421	// Indicate that we are a fixed Huffman block
422	var value int32 = 2
423	if isEof {
424		value = 3
425	}
426	w.writeBits(value, 3)
427}
428
429// writeBlock will write a block of tokens with the smallest encoding.
430// The original input can be supplied, and if the huffman encoded data
431// is larger than the original bytes, the data will be written as a
432// stored block.
433// If the input is nil, the tokens will always be Huffman encoded.
434func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
435	if w.err != nil {
436		return
437	}
438
439	tokens = append(tokens, endBlockMarker)
440	numLiterals, numOffsets := w.indexTokens(tokens)
441
442	var extraBits int
443	storedSize, storable := w.storedSize(input)
444	if storable {
445		// We only bother calculating the costs of the extra bits required by
446		// the length of offset fields (which will be the same for both fixed
447		// and dynamic encoding), if we need to compare those two encodings
448		// against stored encoding.
449		for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
450			// First eight length codes have extra size = 0.
451			extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
452		}
453		for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
454			// First four offset codes have extra size = 0.
455			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
456		}
457	}
458
459	// Figure out smallest code.
460	// Fixed Huffman baseline.
461	var literalEncoding = fixedLiteralEncoding
462	var offsetEncoding = fixedOffsetEncoding
463	var size = w.fixedSize(extraBits)
464
465	// Dynamic Huffman?
466	var numCodegens int
467
468	// Generate codegen and codegenFrequencies, which indicates how to encode
469	// the literalEncoding and the offsetEncoding.
470	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
471	w.codegenEncoding.generate(w.codegenFreq[:], 7)
472	dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
473
474	if dynamicSize < size {
475		size = dynamicSize
476		literalEncoding = w.literalEncoding
477		offsetEncoding = w.offsetEncoding
478	}
479
480	// Stored bytes?
481	if storable && storedSize < size {
482		w.writeStoredHeader(len(input), eof)
483		w.writeBytes(input)
484		return
485	}
486
487	// Huffman.
488	if literalEncoding == fixedLiteralEncoding {
489		w.writeFixedHeader(eof)
490	} else {
491		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
492	}
493
494	// Write the tokens.
495	w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
496}
497
498// writeBlockDynamic encodes a block using a dynamic Huffman table.
499// This should be used if the symbols used have a disproportionate
500// histogram distribution.
501// If input is supplied and the compression savings are below 1/16th of the
502// input size the block is stored.
503func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
504	if w.err != nil {
505		return
506	}
507
508	tokens = append(tokens, endBlockMarker)
509	numLiterals, numOffsets := w.indexTokens(tokens)
510
511	// Generate codegen and codegenFrequencies, which indicates how to encode
512	// the literalEncoding and the offsetEncoding.
513	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
514	w.codegenEncoding.generate(w.codegenFreq[:], 7)
515	size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
516
517	// Store bytes, if we don't get a reasonable improvement.
518	if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
519		w.writeStoredHeader(len(input), eof)
520		w.writeBytes(input)
521		return
522	}
523
524	// Write Huffman table.
525	w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
526
527	// Write the tokens.
528	w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
529}
530
531// indexTokens indexes a slice of tokens, and updates
532// literalFreq and offsetFreq, and generates literalEncoding
533// and offsetEncoding.
534// The number of literal and offset tokens is returned.
535func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
536	for i := range w.literalFreq {
537		w.literalFreq[i] = 0
538	}
539	for i := range w.offsetFreq {
540		w.offsetFreq[i] = 0
541	}
542
543	for _, t := range tokens {
544		if t < matchType {
545			w.literalFreq[t.literal()]++
546			continue
547		}
548		length := t.length()
549		offset := t.offset()
550		w.literalFreq[lengthCodesStart+lengthCode(length)]++
551		w.offsetFreq[offsetCode(offset)]++
552	}
553
554	// get the number of literals
555	numLiterals = len(w.literalFreq)
556	for w.literalFreq[numLiterals-1] == 0 {
557		numLiterals--
558	}
559	// get the number of offsets
560	numOffsets = len(w.offsetFreq)
561	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
562		numOffsets--
563	}
564	if numOffsets == 0 {
565		// We haven't found a single match. If we want to go with the dynamic encoding,
566		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
567		w.offsetFreq[0] = 1
568		numOffsets = 1
569	}
570	w.literalEncoding.generate(w.literalFreq, 15)
571	w.offsetEncoding.generate(w.offsetFreq, 15)
572	return
573}
574
575// writeTokens writes a slice of tokens to the output.
576// codes for literal and offset encoding must be supplied.
577func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
578	if w.err != nil {
579		return
580	}
581	for _, t := range tokens {
582		if t < matchType {
583			w.writeCode(leCodes[t.literal()])
584			continue
585		}
586		// Write the length
587		length := t.length()
588		lengthCode := lengthCode(length)
589		w.writeCode(leCodes[lengthCode+lengthCodesStart])
590		extraLengthBits := uint(lengthExtraBits[lengthCode])
591		if extraLengthBits > 0 {
592			extraLength := int32(length - lengthBase[lengthCode])
593			w.writeBits(extraLength, extraLengthBits)
594		}
595		// Write the offset
596		offset := t.offset()
597		offsetCode := offsetCode(offset)
598		w.writeCode(oeCodes[offsetCode])
599		extraOffsetBits := uint(offsetExtraBits[offsetCode])
600		if extraOffsetBits > 0 {
601			extraOffset := int32(offset - offsetBase[offsetCode])
602			w.writeBits(extraOffset, extraOffsetBits)
603		}
604	}
605}
606
607// huffOffset is a static offset encoder used for huffman only encoding.
608// It can be reused since we will not be encoding offset values.
609var huffOffset *huffmanEncoder
610
611func init() {
612	offsetFreq := make([]int32, offsetCodeCount)
613	offsetFreq[0] = 1
614	huffOffset = newHuffmanEncoder(offsetCodeCount)
615	huffOffset.generate(offsetFreq, 15)
616}
617
618// writeBlockHuff encodes a block of bytes as either
619// Huffman encoded literals or uncompressed bytes if the
620// results only gains very little from compression.
621func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
622	if w.err != nil {
623		return
624	}
625
626	// Clear histogram
627	for i := range w.literalFreq {
628		w.literalFreq[i] = 0
629	}
630
631	// Add everything as literals
632	histogram(input, w.literalFreq)
633
634	w.literalFreq[endBlockMarker] = 1
635
636	const numLiterals = endBlockMarker + 1
637	const numOffsets = 1
638
639	w.literalEncoding.generate(w.literalFreq, 15)
640
641	// Figure out smallest code.
642	// Always use dynamic Huffman or Store
643	var numCodegens int
644
645	// Generate codegen and codegenFrequencies, which indicates how to encode
646	// the literalEncoding and the offsetEncoding.
647	w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
648	w.codegenEncoding.generate(w.codegenFreq[:], 7)
649	size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
650
651	// Store bytes, if we don't get a reasonable improvement.
652	if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
653		w.writeStoredHeader(len(input), eof)
654		w.writeBytes(input)
655		return
656	}
657
658	// Huffman.
659	w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
660	encoding := w.literalEncoding.codes[:257]
661	n := w.nbytes
662	for _, t := range input {
663		// Bitwriting inlined, ~30% speedup
664		c := encoding[t]
665		w.bits |= uint64(c.code) << w.nbits
666		w.nbits += uint(c.len)
667		if w.nbits < 48 {
668			continue
669		}
670		// Store 6 bytes
671		bits := w.bits
672		w.bits >>= 48
673		w.nbits -= 48
674		bytes := w.bytes[n : n+6]
675		bytes[0] = byte(bits)
676		bytes[1] = byte(bits >> 8)
677		bytes[2] = byte(bits >> 16)
678		bytes[3] = byte(bits >> 24)
679		bytes[4] = byte(bits >> 32)
680		bytes[5] = byte(bits >> 40)
681		n += 6
682		if n < bufferFlushSize {
683			continue
684		}
685		w.write(w.bytes[:n])
686		if w.err != nil {
687			return // Return early in the event of write failures
688		}
689		n = 0
690	}
691	w.nbytes = n
692	w.writeCode(encoding[endBlockMarker])
693}
694
695// histogram accumulates a histogram of b in h.
696//
697// len(h) must be >= 256, and h's elements must be all zeroes.
698func histogram(b []byte, h []int32) {
699	h = h[:256]
700	for _, t := range b {
701		h[t]++
702	}
703}
704