1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cache
6
7import (
8	"bytes"
9	"crypto/sha256"
10	"fmt"
11	"hash"
12	"io"
13	"os"
14	"sync"
15)
16
17var debugHash = false // set when GODEBUG=gocachehash=1
18
19// HashSize is the number of bytes in a hash.
20const HashSize = 32
21
22// A Hash provides access to the canonical hash function used to index the cache.
23// The current implementation uses salted SHA256, but clients must not assume this.
24type Hash struct {
25	h    hash.Hash
26	name string        // for debugging
27	buf  *bytes.Buffer // for verify
28}
29
30// hashSalt is a salt string added to the beginning of every hash
31// created by NewHash. Using the golangci-lint version makes sure that different
32// versions of the command do not address the same cache
33// entries, so that a bug in one version does not affect the execution
34// of other versions. This salt will result in additional ActionID files
35// in the cache, but not additional copies of the large output files,
36// which are still addressed by unsalted SHA256.
37var hashSalt []byte
38
39func SetSalt(b []byte) {
40	hashSalt = b
41}
42
43// Subkey returns an action ID corresponding to mixing a parent
44// action ID with a string description of the subkey.
45func Subkey(parent ActionID, desc string) (ActionID, error) {
46	h := sha256.New()
47	const subkeyPrefix = "subkey:"
48	if n, err := h.Write([]byte(subkeyPrefix)); n != len(subkeyPrefix) {
49		return ActionID{}, fmt.Errorf("wrote %d/%d bytes of subkey prefix with error %s", n, len(subkeyPrefix), err)
50	}
51	if n, err := h.Write(parent[:]); n != len(parent) {
52		return ActionID{}, fmt.Errorf("wrote %d/%d bytes of parent with error %s", n, len(parent), err)
53	}
54	if n, err := h.Write([]byte(desc)); n != len(desc) {
55		return ActionID{}, fmt.Errorf("wrote %d/%d bytes of desc with error %s", n, len(desc), err)
56	}
57
58	var out ActionID
59	h.Sum(out[:0])
60	if debugHash {
61		fmt.Fprintf(os.Stderr, "HASH subkey %x %q = %x\n", parent, desc, out)
62	}
63	if verify {
64		hashDebug.Lock()
65		hashDebug.m[out] = fmt.Sprintf("subkey %x %q", parent, desc)
66		hashDebug.Unlock()
67	}
68	return out, nil
69}
70
71// NewHash returns a new Hash.
72// The caller is expected to Write data to it and then call Sum.
73func NewHash(name string) (*Hash, error) {
74	h := &Hash{h: sha256.New(), name: name}
75	if debugHash {
76		fmt.Fprintf(os.Stderr, "HASH[%s]\n", h.name)
77	}
78	if n, err := h.Write(hashSalt); n != len(hashSalt) {
79		return nil, fmt.Errorf("wrote %d/%d bytes of hash salt with error %s", n, len(hashSalt), err)
80	}
81	if verify {
82		h.buf = new(bytes.Buffer)
83	}
84	return h, nil
85}
86
87// Write writes data to the running hash.
88func (h *Hash) Write(b []byte) (int, error) {
89	if debugHash {
90		fmt.Fprintf(os.Stderr, "HASH[%s]: %q\n", h.name, b)
91	}
92	if h.buf != nil {
93		h.buf.Write(b)
94	}
95	return h.h.Write(b)
96}
97
98// Sum returns the hash of the data written previously.
99func (h *Hash) Sum() [HashSize]byte {
100	var out [HashSize]byte
101	h.h.Sum(out[:0])
102	if debugHash {
103		fmt.Fprintf(os.Stderr, "HASH[%s]: %x\n", h.name, out)
104	}
105	if h.buf != nil {
106		hashDebug.Lock()
107		if hashDebug.m == nil {
108			hashDebug.m = make(map[[HashSize]byte]string)
109		}
110		hashDebug.m[out] = h.buf.String()
111		hashDebug.Unlock()
112	}
113	return out
114}
115
116// In GODEBUG=gocacheverify=1 mode,
117// hashDebug holds the input to every computed hash ID,
118// so that we can work backward from the ID involved in a
119// cache entry mismatch to a description of what should be there.
120var hashDebug struct {
121	sync.Mutex
122	m map[[HashSize]byte]string
123}
124
125// reverseHash returns the input used to compute the hash id.
126func reverseHash(id [HashSize]byte) string {
127	hashDebug.Lock()
128	s := hashDebug.m[id]
129	hashDebug.Unlock()
130	return s
131}
132
133var hashFileCache struct {
134	sync.Mutex
135	m map[string][HashSize]byte
136}
137
138// FileHash returns the hash of the named file.
139// It caches repeated lookups for a given file,
140// and the cache entry for a file can be initialized
141// using SetFileHash.
142// The hash used by FileHash is not the same as
143// the hash used by NewHash.
144func FileHash(file string) ([HashSize]byte, error) {
145	hashFileCache.Lock()
146	out, ok := hashFileCache.m[file]
147	hashFileCache.Unlock()
148
149	if ok {
150		return out, nil
151	}
152
153	h := sha256.New()
154	f, err := os.Open(file)
155	if err != nil {
156		if debugHash {
157			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
158		}
159		return [HashSize]byte{}, err
160	}
161	_, err = io.Copy(h, f)
162	f.Close()
163	if err != nil {
164		if debugHash {
165			fmt.Fprintf(os.Stderr, "HASH %s: %v\n", file, err)
166		}
167		return [HashSize]byte{}, err
168	}
169	h.Sum(out[:0])
170	if debugHash {
171		fmt.Fprintf(os.Stderr, "HASH %s: %x\n", file, out)
172	}
173
174	SetFileHash(file, out)
175	return out, nil
176}
177
178// SetFileHash sets the hash returned by FileHash for file.
179func SetFileHash(file string, sum [HashSize]byte) {
180	hashFileCache.Lock()
181	if hashFileCache.m == nil {
182		hashFileCache.m = make(map[string][HashSize]byte)
183	}
184	hashFileCache.m[file] = sum
185	hashFileCache.Unlock()
186}
187