1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package cache implements a build artifact cache.
6//
7// This package is a slightly modified fork of Go's
8// cmd/go/internal/cache package.
9package cache
10
11import (
12	"bytes"
13	"crypto/sha256"
14	"encoding/hex"
15	"errors"
16	"fmt"
17	"io"
18	"io/ioutil"
19	"os"
20	"path/filepath"
21	"strconv"
22	"strings"
23	"time"
24
25	"honnef.co/go/tools/internal/renameio"
26)
27
28// An ActionID is a cache action key, the hash of a complete description of a
29// repeatable computation (command line, environment variables,
30// input file contents, executable contents).
31type ActionID [HashSize]byte
32
33// An OutputID is a cache output key, the hash of an output of a computation.
34type OutputID [HashSize]byte
35
36// A Cache is a package cache, backed by a file system directory tree.
37type Cache struct {
38	dir string
39	now func() time.Time
40}
41
42// Open opens and returns the cache in the given directory.
43//
44// It is safe for multiple processes on a single machine to use the
45// same cache directory in a local file system simultaneously.
46// They will coordinate using operating system file locks and may
47// duplicate effort but will not corrupt the cache.
48//
49// However, it is NOT safe for multiple processes on different machines
50// to share a cache directory (for example, if the directory were stored
51// in a network file system). File locking is notoriously unreliable in
52// network file systems and may not suffice to protect the cache.
53//
54func Open(dir string) (*Cache, error) {
55	info, err := os.Stat(dir)
56	if err != nil {
57		return nil, err
58	}
59	if !info.IsDir() {
60		return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
61	}
62	for i := 0; i < 256; i++ {
63		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
64		if err := os.MkdirAll(name, 0777); err != nil {
65			return nil, err
66		}
67	}
68	c := &Cache{
69		dir: dir,
70		now: time.Now,
71	}
72	return c, nil
73}
74
75// fileName returns the name of the file corresponding to the given id.
76func (c *Cache) fileName(id [HashSize]byte, key string) string {
77	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
78}
79
80var errMissing = errors.New("cache entry not found")
81
82const (
83	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
84	hexSize   = HashSize * 2
85	entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
86)
87
88// verify controls whether to run the cache in verify mode.
89// In verify mode, the cache always returns errMissing from Get
90// but then double-checks in Put that the data being written
91// exactly matches any existing entry. This provides an easy
92// way to detect program behavior that would have been different
93// had the cache entry been returned from Get.
94//
95// verify is enabled by setting the environment variable
96// GODEBUG=gocacheverify=1.
97var verify = false
98
99// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
100var DebugTest = false
101
102func init() { initEnv() }
103
104func initEnv() {
105	verify = false
106	debugHash = false
107	debug := strings.Split(os.Getenv("GODEBUG"), ",")
108	for _, f := range debug {
109		if f == "gocacheverify=1" {
110			verify = true
111		}
112		if f == "gocachehash=1" {
113			debugHash = true
114		}
115		if f == "gocachetest=1" {
116			DebugTest = true
117		}
118	}
119}
120
121// Get looks up the action ID in the cache,
122// returning the corresponding output ID and file size, if any.
123// Note that finding an output ID does not guarantee that the
124// saved file for that output ID is still available.
125func (c *Cache) Get(id ActionID) (Entry, error) {
126	if verify {
127		return Entry{}, errMissing
128	}
129	return c.get(id)
130}
131
132type Entry struct {
133	OutputID OutputID
134	Size     int64
135	Time     time.Time
136}
137
138// get is Get but does not respect verify mode, so that Put can use it.
139func (c *Cache) get(id ActionID) (Entry, error) {
140	missing := func() (Entry, error) {
141		return Entry{}, errMissing
142	}
143	f, err := os.Open(c.fileName(id, "a"))
144	if err != nil {
145		return missing()
146	}
147	defer f.Close()
148	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
149	if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
150		return missing()
151	}
152	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
153		return missing()
154	}
155	eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
156	eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
157	esize, entry := entry[1:1+20], entry[1+20:]
158	//lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465
159	etime, entry := entry[1:1+20], entry[1+20:]
160	var buf [HashSize]byte
161	if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
162		return missing()
163	}
164	if _, err := hex.Decode(buf[:], eout); err != nil {
165		return missing()
166	}
167	i := 0
168	for i < len(esize) && esize[i] == ' ' {
169		i++
170	}
171	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
172	if err != nil || size < 0 {
173		return missing()
174	}
175	i = 0
176	for i < len(etime) && etime[i] == ' ' {
177		i++
178	}
179	tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
180	if err != nil || tm < 0 {
181		return missing()
182	}
183
184	c.used(c.fileName(id, "a"))
185
186	return Entry{buf, size, time.Unix(0, tm)}, nil
187}
188
189// GetFile looks up the action ID in the cache and returns
190// the name of the corresponding data file.
191func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
192	entry, err = c.Get(id)
193	if err != nil {
194		return "", Entry{}, err
195	}
196	file = c.OutputFile(entry.OutputID)
197	info, err := os.Stat(file)
198	if err != nil || info.Size() != entry.Size {
199		return "", Entry{}, errMissing
200	}
201	return file, entry, nil
202}
203
204// GetBytes looks up the action ID in the cache and returns
205// the corresponding output bytes.
206// GetBytes should only be used for data that can be expected to fit in memory.
207func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
208	entry, err := c.Get(id)
209	if err != nil {
210		return nil, entry, err
211	}
212	data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
213	if sha256.Sum256(data) != entry.OutputID {
214		return nil, entry, errMissing
215	}
216	return data, entry, nil
217}
218
219// OutputFile returns the name of the cache file storing output with the given OutputID.
220func (c *Cache) OutputFile(out OutputID) string {
221	file := c.fileName(out, "d")
222	c.used(file)
223	return file
224}
225
226// Time constants for cache expiration.
227//
228// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
229// to avoid causing many unnecessary inode updates. The mtimes therefore
230// roughly reflect "time of last use" but may in fact be older by at most an hour.
231//
232// We scan the cache for entries to delete at most once per trimInterval (1 day).
233//
234// When we do scan the cache, we delete entries that have not been used for
235// at least trimLimit (5 days). Statistics gathered from a month of usage by
236// Go developers found that essentially all reuse of cached entries happened
237// within 5 days of the previous reuse. See golang.org/issue/22990.
238const (
239	mtimeInterval = 1 * time.Hour
240	trimInterval  = 24 * time.Hour
241	trimLimit     = 5 * 24 * time.Hour
242)
243
244// used makes a best-effort attempt to update mtime on file,
245// so that mtime reflects cache access time.
246//
247// Because the reflection only needs to be approximate,
248// and to reduce the amount of disk activity caused by using
249// cache entries, used only updates the mtime if the current
250// mtime is more than an hour old. This heuristic eliminates
251// nearly all of the mtime updates that would otherwise happen,
252// while still keeping the mtimes useful for cache trimming.
253func (c *Cache) used(file string) {
254	info, err := os.Stat(file)
255	if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
256		return
257	}
258	os.Chtimes(file, c.now(), c.now())
259}
260
261// Trim removes old cache entries that are likely not to be reused.
262func (c *Cache) Trim() {
263	now := c.now()
264
265	// We maintain in dir/trim.txt the time of the last completed cache trim.
266	// If the cache has been trimmed recently enough, do nothing.
267	// This is the common case.
268	data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt"))
269	t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
270	if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
271		return
272	}
273
274	// Trim each of the 256 subdirectories.
275	// We subtract an additional mtimeInterval
276	// to account for the imprecision of our "last used" mtimes.
277	cutoff := now.Add(-trimLimit - mtimeInterval)
278	for i := 0; i < 256; i++ {
279		subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
280		c.trimSubdir(subdir, cutoff)
281	}
282
283	// Ignore errors from here: if we don't write the complete timestamp, the
284	// cache will appear older than it is, and we'll trim it again next time.
285	renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
286}
287
288// trimSubdir trims a single cache subdirectory.
289func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
290	// Read all directory entries from subdir before removing
291	// any files, in case removing files invalidates the file offset
292	// in the directory scan. Also, ignore error from f.Readdirnames,
293	// because we don't care about reporting the error and we still
294	// want to process any entries found before the error.
295	f, err := os.Open(subdir)
296	if err != nil {
297		return
298	}
299	names, _ := f.Readdirnames(-1)
300	f.Close()
301
302	for _, name := range names {
303		// Remove only cache entries (xxxx-a and xxxx-d).
304		if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
305			continue
306		}
307		entry := filepath.Join(subdir, name)
308		info, err := os.Stat(entry)
309		if err == nil && info.ModTime().Before(cutoff) {
310			os.Remove(entry)
311		}
312	}
313}
314
315// putIndexEntry adds an entry to the cache recording that executing the action
316// with the given id produces an output with the given output id (hash) and size.
317func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
318	// Note: We expect that for one reason or another it may happen
319	// that repeating an action produces a different output hash
320	// (for example, if the output contains a time stamp or temp dir name).
321	// While not ideal, this is also not a correctness problem, so we
322	// don't make a big deal about it. In particular, we leave the action
323	// cache entries writable specifically so that they can be overwritten.
324	//
325	// Setting GODEBUG=gocacheverify=1 does make a big deal:
326	// in verify mode we are double-checking that the cache entries
327	// are entirely reproducible. As just noted, this may be unrealistic
328	// in some cases but the check is also useful for shaking out real bugs.
329	entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
330
331	if verify && allowVerify {
332		old, err := c.get(id)
333		if err == nil && (old.OutputID != out || old.Size != size) {
334			// panic to show stack trace, so we can see what code is generating this cache entry.
335			msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
336			panic(msg)
337		}
338	}
339	file := c.fileName(id, "a")
340
341	// Copy file to cache directory.
342	mode := os.O_WRONLY | os.O_CREATE
343	f, err := os.OpenFile(file, mode, 0666)
344	if err != nil {
345		return err
346	}
347	_, err = f.WriteString(entry)
348	if err == nil {
349		// Truncate the file only *after* writing it.
350		// (This should be a no-op, but truncate just in case of previous corruption.)
351		//
352		// This differs from ioutil.WriteFile, which truncates to 0 *before* writing
353		// via os.O_TRUNC. Truncating only after writing ensures that a second write
354		// of the same content to the same file is idempotent, and does not — even
355		// temporarily! — undo the effect of the first write.
356		err = f.Truncate(int64(len(entry)))
357	}
358	if closeErr := f.Close(); err == nil {
359		err = closeErr
360	}
361	if err != nil {
362		// TODO(bcmills): This Remove potentially races with another go command writing to file.
363		// Can we eliminate it?
364		os.Remove(file)
365		return err
366	}
367	os.Chtimes(file, c.now(), c.now()) // mainly for tests
368
369	return nil
370}
371
372// Put stores the given output in the cache as the output for the action ID.
373// It may read file twice. The content of file must not change between the two passes.
374func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
375	return c.put(id, file, true)
376}
377
378// PutNoVerify is like Put but disables the verify check
379// when GODEBUG=goverifycache=1 is set.
380// It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
381// like test output containing times and the like.
382func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
383	return c.put(id, file, false)
384}
385
386func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
387	// Compute output ID.
388	h := sha256.New()
389	if _, err := file.Seek(0, 0); err != nil {
390		return OutputID{}, 0, err
391	}
392	size, err := io.Copy(h, file)
393	if err != nil {
394		return OutputID{}, 0, err
395	}
396	var out OutputID
397	h.Sum(out[:0])
398
399	// Copy to cached output file (if not already present).
400	if err := c.copyFile(file, out, size); err != nil {
401		return out, size, err
402	}
403
404	// Add to cache index.
405	return out, size, c.putIndexEntry(id, out, size, allowVerify)
406}
407
408// PutBytes stores the given bytes in the cache as the output for the action ID.
409func (c *Cache) PutBytes(id ActionID, data []byte) error {
410	_, _, err := c.Put(id, bytes.NewReader(data))
411	return err
412}
413
414// copyFile copies file into the cache, expecting it to have the given
415// output ID and size, if that file is not present already.
416func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
417	name := c.fileName(out, "d")
418	info, err := os.Stat(name)
419	if err == nil && info.Size() == size {
420		// Check hash.
421		if f, err := os.Open(name); err == nil {
422			h := sha256.New()
423			io.Copy(h, f)
424			f.Close()
425			var out2 OutputID
426			h.Sum(out2[:0])
427			if out == out2 {
428				return nil
429			}
430		}
431		// Hash did not match. Fall through and rewrite file.
432	}
433
434	// Copy file to cache directory.
435	mode := os.O_RDWR | os.O_CREATE
436	if err == nil && info.Size() > size { // shouldn't happen but fix in case
437		mode |= os.O_TRUNC
438	}
439	f, err := os.OpenFile(name, mode, 0666)
440	if err != nil {
441		return err
442	}
443	defer f.Close()
444	if size == 0 {
445		// File now exists with correct size.
446		// Only one possible zero-length file, so contents are OK too.
447		// Early return here makes sure there's a "last byte" for code below.
448		return nil
449	}
450
451	// From here on, if any of the I/O writing the file fails,
452	// we make a best-effort attempt to truncate the file f
453	// before returning, to avoid leaving bad bytes in the file.
454
455	// Copy file to f, but also into h to double-check hash.
456	if _, err := file.Seek(0, 0); err != nil {
457		f.Truncate(0)
458		return err
459	}
460	h := sha256.New()
461	w := io.MultiWriter(f, h)
462	if _, err := io.CopyN(w, file, size-1); err != nil {
463		f.Truncate(0)
464		return err
465	}
466	// Check last byte before writing it; writing it will make the size match
467	// what other processes expect to find and might cause them to start
468	// using the file.
469	buf := make([]byte, 1)
470	if _, err := file.Read(buf); err != nil {
471		f.Truncate(0)
472		return err
473	}
474	h.Write(buf)
475	sum := h.Sum(nil)
476	if !bytes.Equal(sum, out[:]) {
477		f.Truncate(0)
478		return fmt.Errorf("file content changed underfoot")
479	}
480
481	// Commit cache file entry.
482	if _, err := f.Write(buf); err != nil {
483		f.Truncate(0)
484		return err
485	}
486	if err := f.Close(); err != nil {
487		// Data might not have been written,
488		// but file may look like it is the right size.
489		// To be extra careful, remove cached file.
490		os.Remove(name)
491		return err
492	}
493	os.Chtimes(name, c.now(), c.now()) // mainly for tests
494
495	return nil
496}
497