1// Copyright 2015, Joe Tsai. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE.md file.
4
5// Package bzip2 implements the BZip2 compressed data format.
6//
7// Canonical C implementation:
8//	http://bzip.org
9//
10// Unofficial format specification:
11//	https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
12package bzip2
13
14import (
15	"fmt"
16	"hash/crc32"
17
18	"github.com/dsnet/compress/internal"
19	"github.com/dsnet/compress/internal/errors"
20)
21
22// There does not exist a formal specification of the BZip2 format. As such,
23// much of this work is derived by either reverse engineering the original C
24// source code or using secondary sources.
25//
26// Significant amounts of fuzz testing is done to ensure that outputs from
27// this package is properly decoded by the C library. Furthermore, we test that
28// both this package and the C library agree about what inputs are invalid.
29//
30// Compression stack:
31//	Run-length encoding 1     (RLE1)
32//	Burrows-Wheeler transform (BWT)
33//	Move-to-front transform   (MTF)
34//	Run-length encoding 2     (RLE2)
35//	Prefix encoding           (PE)
36//
37// References:
38//	http://bzip.org/
39//	https://en.wikipedia.org/wiki/Bzip2
40//	https://code.google.com/p/jbzip2/
41
42const (
43	BestSpeed          = 1
44	BestCompression    = 9
45	DefaultCompression = 6
46)
47
48const (
49	hdrMagic = 0x425a         // Hex of "BZ"
50	blkMagic = 0x314159265359 // BCD of PI
51	endMagic = 0x177245385090 // BCD of sqrt(PI)
52
53	blockSize = 100000
54)
55
56func errorf(c int, f string, a ...interface{}) error {
57	return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)}
58}
59
60func panicf(c int, f string, a ...interface{}) {
61	errors.Panic(errorf(c, f, a...))
62}
63
64// errWrap converts a lower-level errors.Error to be one from this package.
65// The replaceCode passed in will be used to replace the code for any errors
66// with the errors.Invalid code.
67//
68// For the Reader, set this to errors.Corrupted.
69// For the Writer, set this to errors.Internal.
70func errWrap(err error, replaceCode int) error {
71	if cerr, ok := err.(errors.Error); ok {
72		if errors.IsInvalid(cerr) {
73			cerr.Code = replaceCode
74		}
75		err = errorf(cerr.Code, "%s", cerr.Msg)
76	}
77	return err
78}
79
80var errClosed = errorf(errors.Closed, "")
81
82// crc computes the CRC-32 used by BZip2.
83//
84// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
85// order. That is, the MSB is read before the LSB. Thus, we can use the
86// standard library version of CRC-32 IEEE with some minor adjustments.
87//
88// The byte array is used as an intermediate buffer to swap the bits of every
89// byte of the input.
90type crc struct {
91	val uint32
92	buf [256]byte
93}
94
95// update computes the CRC-32 of appending buf to c.
96func (c *crc) update(buf []byte) {
97	cval := internal.ReverseUint32(c.val)
98	for len(buf) > 0 {
99		n := len(buf)
100		if n > len(c.buf) {
101			n = len(c.buf)
102		}
103		for i, b := range buf[:n] {
104			c.buf[i] = internal.ReverseLUT[b]
105		}
106		cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n])
107		buf = buf[n:]
108	}
109	c.val = internal.ReverseUint32(cval)
110}
111