1// Package multihash is the Go implementation of
2// https://github.com/multiformats/multihash, or self-describing
3// hashes.
4package multihash
5
6import (
7	"encoding/hex"
8	"errors"
9	"fmt"
10	"math"
11
12	b58 "github.com/mr-tron/base58/base58"
13	"github.com/multiformats/go-varint"
14)
15
16// errors
17var (
18	ErrUnknownCode      = errors.New("unknown multihash code")
19	ErrTooShort         = errors.New("multihash too short. must be >= 2 bytes")
20	ErrTooLong          = errors.New("multihash too long. must be < 129 bytes")
21	ErrLenNotSupported  = errors.New("multihash does not yet support digests longer than 127 bytes")
22	ErrInvalidMultihash = errors.New("input isn't valid multihash")
23
24	ErrVarintBufferShort = errors.New("uvarint: buffer too small")
25	ErrVarintTooLong     = errors.New("uvarint: varint too big (max 64bit)")
26)
27
28// ErrInconsistentLen is returned when a decoded multihash has an inconsistent length
29type ErrInconsistentLen struct {
30	dm *DecodedMultihash
31}
32
33func (e ErrInconsistentLen) Error() string {
34	return fmt.Sprintf("multihash length inconsistent: expected %d, got %d", e.dm.Length, len(e.dm.Digest))
35}
36
37// constants
38const (
39	IDENTITY = 0x00
40	// Deprecated: use IDENTITY
41	ID         = IDENTITY
42	SHA1       = 0x11
43	SHA2_256   = 0x12
44	SHA2_512   = 0x13
45	SHA3_224   = 0x17
46	SHA3_256   = 0x16
47	SHA3_384   = 0x15
48	SHA3_512   = 0x14
49	SHA3       = SHA3_512
50	KECCAK_224 = 0x1A
51	KECCAK_256 = 0x1B
52	KECCAK_384 = 0x1C
53	KECCAK_512 = 0x1D
54
55	SHAKE_128 = 0x18
56	SHAKE_256 = 0x19
57
58	BLAKE2B_MIN = 0xb201
59	BLAKE2B_MAX = 0xb240
60	BLAKE2S_MIN = 0xb241
61	BLAKE2S_MAX = 0xb260
62
63	MD5 = 0xd5
64
65	DBL_SHA2_256 = 0x56
66
67	MURMUR3_128 = 0x22
68	// Deprecated: use MURMUR3_128
69	MURMUR3 = MURMUR3_128
70
71	SHA2_256_TRUNC254_PADDED  = 0x1012
72	X11                       = 0x1100
73	POSEIDON_BLS12_381_A1_FC1 = 0xb401
74)
75
76func init() {
77	// Add blake2b (64 codes)
78	for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
79		n := c - BLAKE2B_MIN + 1
80		name := fmt.Sprintf("blake2b-%d", n*8)
81		Names[name] = c
82		Codes[c] = name
83		DefaultLengths[c] = int(n)
84	}
85
86	// Add blake2s (32 codes)
87	for c := uint64(BLAKE2S_MIN); c <= BLAKE2S_MAX; c++ {
88		n := c - BLAKE2S_MIN + 1
89		name := fmt.Sprintf("blake2s-%d", n*8)
90		Names[name] = c
91		Codes[c] = name
92		DefaultLengths[c] = int(n)
93	}
94}
95
96// Names maps the name of a hash to the code
97var Names = map[string]uint64{
98	"identity":                  IDENTITY,
99	"sha1":                      SHA1,
100	"sha2-256":                  SHA2_256,
101	"sha2-512":                  SHA2_512,
102	"sha3":                      SHA3_512,
103	"sha3-224":                  SHA3_224,
104	"sha3-256":                  SHA3_256,
105	"sha3-384":                  SHA3_384,
106	"sha3-512":                  SHA3_512,
107	"dbl-sha2-256":              DBL_SHA2_256,
108	"murmur3-128":               MURMUR3_128,
109	"keccak-224":                KECCAK_224,
110	"keccak-256":                KECCAK_256,
111	"keccak-384":                KECCAK_384,
112	"keccak-512":                KECCAK_512,
113	"shake-128":                 SHAKE_128,
114	"shake-256":                 SHAKE_256,
115	"sha2-256-trunc254-padded":  SHA2_256_TRUNC254_PADDED,
116	"x11":                       X11,
117	"md5":                       MD5,
118	"poseidon-bls12_381-a2-fc1": POSEIDON_BLS12_381_A1_FC1,
119}
120
121// Codes maps a hash code to it's name
122var Codes = map[uint64]string{
123	IDENTITY:                  "identity",
124	SHA1:                      "sha1",
125	SHA2_256:                  "sha2-256",
126	SHA2_512:                  "sha2-512",
127	SHA3_224:                  "sha3-224",
128	SHA3_256:                  "sha3-256",
129	SHA3_384:                  "sha3-384",
130	SHA3_512:                  "sha3-512",
131	DBL_SHA2_256:              "dbl-sha2-256",
132	MURMUR3_128:               "murmur3-128",
133	KECCAK_224:                "keccak-224",
134	KECCAK_256:                "keccak-256",
135	KECCAK_384:                "keccak-384",
136	KECCAK_512:                "keccak-512",
137	SHAKE_128:                 "shake-128",
138	SHAKE_256:                 "shake-256",
139	SHA2_256_TRUNC254_PADDED:  "sha2-256-trunc254-padded",
140	X11:                       "x11",
141	POSEIDON_BLS12_381_A1_FC1: "poseidon-bls12_381-a2-fc1",
142	MD5:                       "md5",
143}
144
145// DefaultLengths maps a hash code to it's default length
146var DefaultLengths = map[uint64]int{
147	IDENTITY:     -1,
148	SHA1:         20,
149	SHA2_256:     32,
150	SHA2_512:     64,
151	SHA3_224:     28,
152	SHA3_256:     32,
153	SHA3_384:     48,
154	SHA3_512:     64,
155	DBL_SHA2_256: 32,
156	KECCAK_224:   28,
157	KECCAK_256:   32,
158	MURMUR3_128:  4,
159	KECCAK_384:   48,
160	KECCAK_512:   64,
161	SHAKE_128:    32,
162	SHAKE_256:    64,
163	X11:          64,
164	MD5:          16,
165}
166
167func uvarint(buf []byte) (uint64, []byte, error) {
168	n, c, err := varint.FromUvarint(buf)
169	if err != nil {
170		return n, buf, err
171	}
172
173	if c == 0 {
174		return n, buf, ErrVarintBufferShort
175	} else if c < 0 {
176		return n, buf[-c:], ErrVarintTooLong
177	} else {
178		return n, buf[c:], nil
179	}
180}
181
182// DecodedMultihash represents a parsed multihash and allows
183// easy access to the different parts of a multihash.
184type DecodedMultihash struct {
185	Code   uint64
186	Name   string
187	Length int    // Length is just int as it is type of len() opearator
188	Digest []byte // Digest holds the raw multihash bytes
189}
190
191// Multihash is byte slice with the following form:
192// <hash function code><digest size><hash function output>.
193// See the spec for more information.
194type Multihash []byte
195
196// HexString returns the hex-encoded representation of a multihash.
197func (m *Multihash) HexString() string {
198	return hex.EncodeToString([]byte(*m))
199}
200
201// String is an alias to HexString().
202func (m *Multihash) String() string {
203	return m.HexString()
204}
205
206// FromHexString parses a hex-encoded multihash.
207func FromHexString(s string) (Multihash, error) {
208	b, err := hex.DecodeString(s)
209	if err != nil {
210		return Multihash{}, err
211	}
212
213	return Cast(b)
214}
215
216// B58String returns the B58-encoded representation of a multihash.
217func (m Multihash) B58String() string {
218	return b58.Encode([]byte(m))
219}
220
221// FromB58String parses a B58-encoded multihash.
222func FromB58String(s string) (m Multihash, err error) {
223	b, err := b58.Decode(s)
224	if err != nil {
225		return Multihash{}, ErrInvalidMultihash
226	}
227
228	return Cast(b)
229}
230
231// Cast casts a buffer onto a multihash, and returns an error
232// if it does not work.
233func Cast(buf []byte) (Multihash, error) {
234	dm, err := Decode(buf)
235	if err != nil {
236		return Multihash{}, err
237	}
238
239	if !ValidCode(dm.Code) {
240		return Multihash{}, ErrUnknownCode
241	}
242
243	return Multihash(buf), nil
244}
245
246// Decode parses multihash bytes into a DecodedMultihash.
247func Decode(buf []byte) (*DecodedMultihash, error) {
248	rlen, code, hdig, err := readMultihashFromBuf(buf)
249	if err != nil {
250		return nil, err
251	}
252
253	dm := &DecodedMultihash{
254		Code:   code,
255		Name:   Codes[code],
256		Length: len(hdig),
257		Digest: hdig,
258	}
259
260	if len(buf) != rlen {
261		return nil, ErrInconsistentLen{dm}
262	}
263
264	return dm, nil
265}
266
267// Encode a hash digest along with the specified function code.
268// Note: the length is derived from the length of the digest itself.
269func Encode(buf []byte, code uint64) ([]byte, error) {
270	if !ValidCode(code) {
271		return nil, ErrUnknownCode
272	}
273
274	newBuf := make([]byte, varint.UvarintSize(code)+varint.UvarintSize(uint64(len(buf)))+len(buf))
275	n := varint.PutUvarint(newBuf, code)
276	n += varint.PutUvarint(newBuf[n:], uint64(len(buf)))
277
278	copy(newBuf[n:], buf)
279	return newBuf, nil
280}
281
282// EncodeName is like Encode() but providing a string name
283// instead of a numeric code. See Names for allowed values.
284func EncodeName(buf []byte, name string) ([]byte, error) {
285	return Encode(buf, Names[name])
286}
287
288// ValidCode checks whether a multihash code is valid.
289func ValidCode(code uint64) bool {
290	_, ok := Codes[code]
291	return ok
292}
293
294// readMultihashFromBuf reads a multihash from the given buffer, returning the
295// individual pieces of the multihash.
296// Note: the returned digest is a slice over the passed in data and should be
297// copied if the buffer will be reused
298func readMultihashFromBuf(buf []byte) (int, uint64, []byte, error) {
299	bufl := len(buf)
300	if bufl < 2 {
301		return 0, 0, nil, ErrTooShort
302	}
303
304	var err error
305	var code, length uint64
306
307	code, buf, err = uvarint(buf)
308	if err != nil {
309		return 0, 0, nil, err
310	}
311
312	length, buf, err = uvarint(buf)
313	if err != nil {
314		return 0, 0, nil, err
315	}
316
317	if length > math.MaxInt32 {
318		return 0, 0, nil, errors.New("digest too long, supporting only <= 2^31-1")
319	}
320	if int(length) > len(buf) {
321		return 0, 0, nil, errors.New("length greater than remaining number of bytes in buffer")
322	}
323
324	rlen := (bufl - len(buf)) + int(length)
325	return rlen, code, buf[:length], nil
326}
327
328// MHFromBytes reads a multihash from the given byte buffer, returning the
329// number of bytes read as well as the multihash
330func MHFromBytes(buf []byte) (int, Multihash, error) {
331	nr, _, _, err := readMultihashFromBuf(buf)
332	if err != nil {
333		return 0, nil, err
334	}
335
336	return nr, Multihash(buf[:nr]), nil
337}
338