1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package cache implements a build artifact cache. 6// 7// This package is a slightly modified fork of Go's 8// cmd/go/internal/cache package. 9package cache 10 11import ( 12 "bytes" 13 "crypto/sha256" 14 "encoding/hex" 15 "errors" 16 "fmt" 17 "io" 18 "io/ioutil" 19 "os" 20 "path/filepath" 21 "strconv" 22 "strings" 23 "time" 24 25 "honnef.co/go/tools/internal/renameio" 26) 27 28// An ActionID is a cache action key, the hash of a complete description of a 29// repeatable computation (command line, environment variables, 30// input file contents, executable contents). 31type ActionID [HashSize]byte 32 33// An OutputID is a cache output key, the hash of an output of a computation. 34type OutputID [HashSize]byte 35 36// A Cache is a package cache, backed by a file system directory tree. 37type Cache struct { 38 dir string 39 now func() time.Time 40} 41 42// Open opens and returns the cache in the given directory. 43// 44// It is safe for multiple processes on a single machine to use the 45// same cache directory in a local file system simultaneously. 46// They will coordinate using operating system file locks and may 47// duplicate effort but will not corrupt the cache. 48// 49// However, it is NOT safe for multiple processes on different machines 50// to share a cache directory (for example, if the directory were stored 51// in a network file system). File locking is notoriously unreliable in 52// network file systems and may not suffice to protect the cache. 53// 54func Open(dir string) (*Cache, error) { 55 info, err := os.Stat(dir) 56 if err != nil { 57 return nil, err 58 } 59 if !info.IsDir() { 60 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 61 } 62 for i := 0; i < 256; i++ { 63 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 64 if err := os.MkdirAll(name, 0777); err != nil { 65 return nil, err 66 } 67 } 68 c := &Cache{ 69 dir: dir, 70 now: time.Now, 71 } 72 return c, nil 73} 74 75// fileName returns the name of the file corresponding to the given id. 76func (c *Cache) fileName(id [HashSize]byte, key string) string { 77 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 78} 79 80var errMissing = errors.New("cache entry not found") 81 82const ( 83 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 84 hexSize = HashSize * 2 85 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 86) 87 88// verify controls whether to run the cache in verify mode. 89// In verify mode, the cache always returns errMissing from Get 90// but then double-checks in Put that the data being written 91// exactly matches any existing entry. This provides an easy 92// way to detect program behavior that would have been different 93// had the cache entry been returned from Get. 94// 95// verify is enabled by setting the environment variable 96// GODEBUG=gocacheverify=1. 97var verify = false 98 99// DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 100var DebugTest = false 101 102func init() { initEnv() } 103 104func initEnv() { 105 verify = false 106 debugHash = false 107 debug := strings.Split(os.Getenv("GODEBUG"), ",") 108 for _, f := range debug { 109 if f == "gocacheverify=1" { 110 verify = true 111 } 112 if f == "gocachehash=1" { 113 debugHash = true 114 } 115 if f == "gocachetest=1" { 116 DebugTest = true 117 } 118 } 119} 120 121// Get looks up the action ID in the cache, 122// returning the corresponding output ID and file size, if any. 123// Note that finding an output ID does not guarantee that the 124// saved file for that output ID is still available. 125func (c *Cache) Get(id ActionID) (Entry, error) { 126 if verify { 127 return Entry{}, errMissing 128 } 129 return c.get(id) 130} 131 132type Entry struct { 133 OutputID OutputID 134 Size int64 135 Time time.Time 136} 137 138// get is Get but does not respect verify mode, so that Put can use it. 139func (c *Cache) get(id ActionID) (Entry, error) { 140 missing := func() (Entry, error) { 141 return Entry{}, errMissing 142 } 143 f, err := os.Open(c.fileName(id, "a")) 144 if err != nil { 145 return missing() 146 } 147 defer f.Close() 148 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 149 if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF { 150 return missing() 151 } 152 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 153 return missing() 154 } 155 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 156 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 157 esize, entry := entry[1:1+20], entry[1+20:] 158 //lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465 159 etime, entry := entry[1:1+20], entry[1+20:] 160 var buf [HashSize]byte 161 if _, err := hex.Decode(buf[:], eid); err != nil || buf != id { 162 return missing() 163 } 164 if _, err := hex.Decode(buf[:], eout); err != nil { 165 return missing() 166 } 167 i := 0 168 for i < len(esize) && esize[i] == ' ' { 169 i++ 170 } 171 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 172 if err != nil || size < 0 { 173 return missing() 174 } 175 i = 0 176 for i < len(etime) && etime[i] == ' ' { 177 i++ 178 } 179 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 180 if err != nil || tm < 0 { 181 return missing() 182 } 183 184 c.used(c.fileName(id, "a")) 185 186 return Entry{buf, size, time.Unix(0, tm)}, nil 187} 188 189// GetFile looks up the action ID in the cache and returns 190// the name of the corresponding data file. 191func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) { 192 entry, err = c.Get(id) 193 if err != nil { 194 return "", Entry{}, err 195 } 196 file = c.OutputFile(entry.OutputID) 197 info, err := os.Stat(file) 198 if err != nil || info.Size() != entry.Size { 199 return "", Entry{}, errMissing 200 } 201 return file, entry, nil 202} 203 204// GetBytes looks up the action ID in the cache and returns 205// the corresponding output bytes. 206// GetBytes should only be used for data that can be expected to fit in memory. 207func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 208 entry, err := c.Get(id) 209 if err != nil { 210 return nil, entry, err 211 } 212 data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID)) 213 if sha256.Sum256(data) != entry.OutputID { 214 return nil, entry, errMissing 215 } 216 return data, entry, nil 217} 218 219// OutputFile returns the name of the cache file storing output with the given OutputID. 220func (c *Cache) OutputFile(out OutputID) string { 221 file := c.fileName(out, "d") 222 c.used(file) 223 return file 224} 225 226// Time constants for cache expiration. 227// 228// We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 229// to avoid causing many unnecessary inode updates. The mtimes therefore 230// roughly reflect "time of last use" but may in fact be older by at most an hour. 231// 232// We scan the cache for entries to delete at most once per trimInterval (1 day). 233// 234// When we do scan the cache, we delete entries that have not been used for 235// at least trimLimit (5 days). Statistics gathered from a month of usage by 236// Go developers found that essentially all reuse of cached entries happened 237// within 5 days of the previous reuse. See golang.org/issue/22990. 238const ( 239 mtimeInterval = 1 * time.Hour 240 trimInterval = 24 * time.Hour 241 trimLimit = 5 * 24 * time.Hour 242) 243 244// used makes a best-effort attempt to update mtime on file, 245// so that mtime reflects cache access time. 246// 247// Because the reflection only needs to be approximate, 248// and to reduce the amount of disk activity caused by using 249// cache entries, used only updates the mtime if the current 250// mtime is more than an hour old. This heuristic eliminates 251// nearly all of the mtime updates that would otherwise happen, 252// while still keeping the mtimes useful for cache trimming. 253func (c *Cache) used(file string) { 254 info, err := os.Stat(file) 255 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 256 return 257 } 258 os.Chtimes(file, c.now(), c.now()) 259} 260 261// Trim removes old cache entries that are likely not to be reused. 262func (c *Cache) Trim() { 263 now := c.now() 264 265 // We maintain in dir/trim.txt the time of the last completed cache trim. 266 // If the cache has been trimmed recently enough, do nothing. 267 // This is the common case. 268 data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt")) 269 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 270 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 271 return 272 } 273 274 // Trim each of the 256 subdirectories. 275 // We subtract an additional mtimeInterval 276 // to account for the imprecision of our "last used" mtimes. 277 cutoff := now.Add(-trimLimit - mtimeInterval) 278 for i := 0; i < 256; i++ { 279 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 280 c.trimSubdir(subdir, cutoff) 281 } 282 283 // Ignore errors from here: if we don't write the complete timestamp, the 284 // cache will appear older than it is, and we'll trim it again next time. 285 renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 286} 287 288// trimSubdir trims a single cache subdirectory. 289func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 290 // Read all directory entries from subdir before removing 291 // any files, in case removing files invalidates the file offset 292 // in the directory scan. Also, ignore error from f.Readdirnames, 293 // because we don't care about reporting the error and we still 294 // want to process any entries found before the error. 295 f, err := os.Open(subdir) 296 if err != nil { 297 return 298 } 299 names, _ := f.Readdirnames(-1) 300 f.Close() 301 302 for _, name := range names { 303 // Remove only cache entries (xxxx-a and xxxx-d). 304 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 305 continue 306 } 307 entry := filepath.Join(subdir, name) 308 info, err := os.Stat(entry) 309 if err == nil && info.ModTime().Before(cutoff) { 310 os.Remove(entry) 311 } 312 } 313} 314 315// putIndexEntry adds an entry to the cache recording that executing the action 316// with the given id produces an output with the given output id (hash) and size. 317func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 318 // Note: We expect that for one reason or another it may happen 319 // that repeating an action produces a different output hash 320 // (for example, if the output contains a time stamp or temp dir name). 321 // While not ideal, this is also not a correctness problem, so we 322 // don't make a big deal about it. In particular, we leave the action 323 // cache entries writable specifically so that they can be overwritten. 324 // 325 // Setting GODEBUG=gocacheverify=1 does make a big deal: 326 // in verify mode we are double-checking that the cache entries 327 // are entirely reproducible. As just noted, this may be unrealistic 328 // in some cases but the check is also useful for shaking out real bugs. 329 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 330 331 if verify && allowVerify { 332 old, err := c.get(id) 333 if err == nil && (old.OutputID != out || old.Size != size) { 334 // panic to show stack trace, so we can see what code is generating this cache entry. 335 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 336 panic(msg) 337 } 338 } 339 file := c.fileName(id, "a") 340 341 // Copy file to cache directory. 342 mode := os.O_WRONLY | os.O_CREATE 343 f, err := os.OpenFile(file, mode, 0666) 344 if err != nil { 345 return err 346 } 347 _, err = f.WriteString(entry) 348 if err == nil { 349 // Truncate the file only *after* writing it. 350 // (This should be a no-op, but truncate just in case of previous corruption.) 351 // 352 // This differs from ioutil.WriteFile, which truncates to 0 *before* writing 353 // via os.O_TRUNC. Truncating only after writing ensures that a second write 354 // of the same content to the same file is idempotent, and does not — even 355 // temporarily! — undo the effect of the first write. 356 err = f.Truncate(int64(len(entry))) 357 } 358 if closeErr := f.Close(); err == nil { 359 err = closeErr 360 } 361 if err != nil { 362 // TODO(bcmills): This Remove potentially races with another go command writing to file. 363 // Can we eliminate it? 364 os.Remove(file) 365 return err 366 } 367 os.Chtimes(file, c.now(), c.now()) // mainly for tests 368 369 return nil 370} 371 372// Put stores the given output in the cache as the output for the action ID. 373// It may read file twice. The content of file must not change between the two passes. 374func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 375 return c.put(id, file, true) 376} 377 378// PutNoVerify is like Put but disables the verify check 379// when GODEBUG=goverifycache=1 is set. 380// It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 381// like test output containing times and the like. 382func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 383 return c.put(id, file, false) 384} 385 386func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 387 // Compute output ID. 388 h := sha256.New() 389 if _, err := file.Seek(0, 0); err != nil { 390 return OutputID{}, 0, err 391 } 392 size, err := io.Copy(h, file) 393 if err != nil { 394 return OutputID{}, 0, err 395 } 396 var out OutputID 397 h.Sum(out[:0]) 398 399 // Copy to cached output file (if not already present). 400 if err := c.copyFile(file, out, size); err != nil { 401 return out, size, err 402 } 403 404 // Add to cache index. 405 return out, size, c.putIndexEntry(id, out, size, allowVerify) 406} 407 408// PutBytes stores the given bytes in the cache as the output for the action ID. 409func (c *Cache) PutBytes(id ActionID, data []byte) error { 410 _, _, err := c.Put(id, bytes.NewReader(data)) 411 return err 412} 413 414// copyFile copies file into the cache, expecting it to have the given 415// output ID and size, if that file is not present already. 416func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 417 name := c.fileName(out, "d") 418 info, err := os.Stat(name) 419 if err == nil && info.Size() == size { 420 // Check hash. 421 if f, err := os.Open(name); err == nil { 422 h := sha256.New() 423 io.Copy(h, f) 424 f.Close() 425 var out2 OutputID 426 h.Sum(out2[:0]) 427 if out == out2 { 428 return nil 429 } 430 } 431 // Hash did not match. Fall through and rewrite file. 432 } 433 434 // Copy file to cache directory. 435 mode := os.O_RDWR | os.O_CREATE 436 if err == nil && info.Size() > size { // shouldn't happen but fix in case 437 mode |= os.O_TRUNC 438 } 439 f, err := os.OpenFile(name, mode, 0666) 440 if err != nil { 441 return err 442 } 443 defer f.Close() 444 if size == 0 { 445 // File now exists with correct size. 446 // Only one possible zero-length file, so contents are OK too. 447 // Early return here makes sure there's a "last byte" for code below. 448 return nil 449 } 450 451 // From here on, if any of the I/O writing the file fails, 452 // we make a best-effort attempt to truncate the file f 453 // before returning, to avoid leaving bad bytes in the file. 454 455 // Copy file to f, but also into h to double-check hash. 456 if _, err := file.Seek(0, 0); err != nil { 457 f.Truncate(0) 458 return err 459 } 460 h := sha256.New() 461 w := io.MultiWriter(f, h) 462 if _, err := io.CopyN(w, file, size-1); err != nil { 463 f.Truncate(0) 464 return err 465 } 466 // Check last byte before writing it; writing it will make the size match 467 // what other processes expect to find and might cause them to start 468 // using the file. 469 buf := make([]byte, 1) 470 if _, err := file.Read(buf); err != nil { 471 f.Truncate(0) 472 return err 473 } 474 h.Write(buf) 475 sum := h.Sum(nil) 476 if !bytes.Equal(sum, out[:]) { 477 f.Truncate(0) 478 return fmt.Errorf("file content changed underfoot") 479 } 480 481 // Commit cache file entry. 482 if _, err := f.Write(buf); err != nil { 483 f.Truncate(0) 484 return err 485 } 486 if err := f.Close(); err != nil { 487 // Data might not have been written, 488 // but file may look like it is the right size. 489 // To be extra careful, remove cached file. 490 os.Remove(name) 491 return err 492 } 493 os.Chtimes(name, c.now(), c.now()) // mainly for tests 494 495 return nil 496} 497