1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package zip provides functions for creating and extracting module zip files. 6// 7// Module zip files have several restrictions listed below. These are necessary 8// to ensure that module zip files can be extracted consistently on supported 9// platforms and file systems. 10// 11// • All file paths within a zip file must start with "<module>@<version>/", 12// where "<module>" is the module path and "<version>" is the version. 13// The module path must be valid (see golang.org/x/mod/module.CheckPath). 14// The version must be valid and canonical (see 15// golang.org/x/mod/module.CanonicalVersion). The path must have a major 16// version suffix consistent with the version (see 17// golang.org/x/mod/module.Check). The part of the file path after the 18// "<module>@<version>/" prefix must be valid (see 19// golang.org/x/mod/module.CheckFilePath). 20// 21// • No two file paths may be equal under Unicode case-folding (see 22// strings.EqualFold). 23// 24// • A go.mod file may or may not appear in the top-level directory. If present, 25// it must be named "go.mod", not any other case. Files named "go.mod" 26// are not allowed in any other directory. 27// 28// • The total size in bytes of a module zip file may be at most MaxZipFile 29// bytes (500 MiB). The total uncompressed size of the files within the 30// zip may also be at most MaxZipFile bytes. 31// 32// • Each file's uncompressed size must match its declared 64-bit uncompressed 33// size in the zip file header. 34// 35// • If the zip contains files named "<module>@<version>/go.mod" or 36// "<module>@<version>/LICENSE", their sizes in bytes may be at most 37// MaxGoMod or MaxLICENSE, respectively (both are 16 MiB). 38// 39// • Empty directories are ignored. File permissions and timestamps are also 40// ignored. 41// 42// • Symbolic links and other irregular files are not allowed. 43// 44// Note that this package does not provide hashing functionality. See 45// golang.org/x/mod/sumdb/dirhash. 46package zip 47 48import ( 49 "archive/zip" 50 "bytes" 51 "fmt" 52 "io" 53 "io/ioutil" 54 "os" 55 "path" 56 "path/filepath" 57 "strings" 58 "unicode" 59 "unicode/utf8" 60 61 "golang.org/x/mod/module" 62) 63 64const ( 65 // MaxZipFile is the maximum size in bytes of a module zip file. The 66 // go command will report an error if either the zip file or its extracted 67 // content is larger than this. 68 MaxZipFile = 500 << 20 69 70 // MaxGoMod is the maximum size in bytes of a go.mod file within a 71 // module zip file. 72 MaxGoMod = 16 << 20 73 74 // MaxLICENSE is the maximum size in bytes of a LICENSE file within a 75 // module zip file. 76 MaxLICENSE = 16 << 20 77) 78 79// File provides an abstraction for a file in a directory, zip, or anything 80// else that looks like a file. 81type File interface { 82 // Path returns a clean slash-separated relative path from the module root 83 // directory to the file. 84 Path() string 85 86 // Lstat returns information about the file. If the file is a symbolic link, 87 // Lstat returns information about the link itself, not the file it points to. 88 Lstat() (os.FileInfo, error) 89 90 // Open provides access to the data within a regular file. Open may return 91 // an error if called on a directory or symbolic link. 92 Open() (io.ReadCloser, error) 93} 94 95// Create builds a zip archive for module m from an abstract list of files 96// and writes it to w. 97// 98// Create verifies the restrictions described in the package documentation 99// and should not produce an archive that Unzip cannot extract. Create does not 100// include files in the output archive if they don't belong in the module zip. 101// In particular, Create will not include files in modules found in 102// subdirectories, most files in vendor directories, or irregular files (such 103// as symbolic links) in the output archive. 104func Create(w io.Writer, m module.Version, files []File) (err error) { 105 defer func() { 106 if err != nil { 107 err = &zipError{verb: "create zip", err: err} 108 } 109 }() 110 111 // Check that the version is canonical, the module path is well-formed, and 112 // the major version suffix matches the major version. 113 if vers := module.CanonicalVersion(m.Version); vers != m.Version { 114 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers) 115 } 116 if err := module.Check(m.Path, m.Version); err != nil { 117 return err 118 } 119 120 // Find directories containing go.mod files (other than the root). 121 // These directories will not be included in the output zip. 122 haveGoMod := make(map[string]bool) 123 for _, f := range files { 124 dir, base := path.Split(f.Path()) 125 if strings.EqualFold(base, "go.mod") { 126 info, err := f.Lstat() 127 if err != nil { 128 return err 129 } 130 if info.Mode().IsRegular() { 131 haveGoMod[dir] = true 132 } 133 } 134 } 135 136 inSubmodule := func(p string) bool { 137 for { 138 dir, _ := path.Split(p) 139 if dir == "" { 140 return false 141 } 142 if haveGoMod[dir] { 143 return true 144 } 145 p = dir[:len(dir)-1] 146 } 147 } 148 149 // Create the module zip file. 150 zw := zip.NewWriter(w) 151 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version) 152 153 addFile := func(f File, path string, size int64) error { 154 rc, err := f.Open() 155 if err != nil { 156 return err 157 } 158 defer rc.Close() 159 w, err := zw.Create(prefix + path) 160 if err != nil { 161 return err 162 } 163 lr := &io.LimitedReader{R: rc, N: size + 1} 164 if _, err := io.Copy(w, lr); err != nil { 165 return err 166 } 167 if lr.N <= 0 { 168 return fmt.Errorf("file %q is larger than declared size", path) 169 } 170 return nil 171 } 172 173 collisions := make(collisionChecker) 174 maxSize := int64(MaxZipFile) 175 for _, f := range files { 176 p := f.Path() 177 if p != path.Clean(p) { 178 return fmt.Errorf("file path %s is not clean", p) 179 } 180 if path.IsAbs(p) { 181 return fmt.Errorf("file path %s is not relative", p) 182 } 183 if isVendoredPackage(p) || inSubmodule(p) { 184 continue 185 } 186 if p == ".hg_archival.txt" { 187 // Inserted by hg archive. 188 // The go command drops this regardless of the VCS being used. 189 continue 190 } 191 if err := module.CheckFilePath(p); err != nil { 192 return err 193 } 194 if strings.ToLower(p) == "go.mod" && p != "go.mod" { 195 return fmt.Errorf("found file named %s, want all lower-case go.mod", p) 196 } 197 info, err := f.Lstat() 198 if err != nil { 199 return err 200 } 201 if err := collisions.check(p, info.IsDir()); err != nil { 202 return err 203 } 204 if !info.Mode().IsRegular() { 205 // Skip symbolic links (golang.org/issue/27093). 206 continue 207 } 208 size := info.Size() 209 if size < 0 || maxSize < size { 210 return fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile) 211 } 212 maxSize -= size 213 if p == "go.mod" && size > MaxGoMod { 214 return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod) 215 } 216 if p == "LICENSE" && size > MaxLICENSE { 217 return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE) 218 } 219 220 if err := addFile(f, p, size); err != nil { 221 return err 222 } 223 } 224 225 return zw.Close() 226} 227 228// CreateFromDir creates a module zip file for module m from the contents of 229// a directory, dir. The zip content is written to w. 230// 231// CreateFromDir verifies the restrictions described in the package 232// documentation and should not produce an archive that Unzip cannot extract. 233// CreateFromDir does not include files in the output archive if they don't 234// belong in the module zip. In particular, CreateFromDir will not include 235// files in modules found in subdirectories, most files in vendor directories, 236// or irregular files (such as symbolic links) in the output archive. 237// Additionally, unlike Create, CreateFromDir will not include directories 238// named ".bzr", ".git", ".hg", or ".svn". 239func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) { 240 defer func() { 241 if zerr, ok := err.(*zipError); ok { 242 zerr.path = dir 243 } else if err != nil { 244 err = &zipError{verb: "create zip", path: dir, err: err} 245 } 246 }() 247 248 var files []File 249 err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error { 250 if err != nil { 251 return err 252 } 253 relPath, err := filepath.Rel(dir, filePath) 254 if err != nil { 255 return err 256 } 257 slashPath := filepath.ToSlash(relPath) 258 259 if info.IsDir() { 260 if filePath == dir { 261 // Don't skip the top-level directory. 262 return nil 263 } 264 265 // Skip VCS directories. 266 // fossil repos are regular files with arbitrary names, so we don't try 267 // to exclude them. 268 switch filepath.Base(filePath) { 269 case ".bzr", ".git", ".hg", ".svn": 270 return filepath.SkipDir 271 } 272 273 // Skip some subdirectories inside vendor, but maintain bug 274 // golang.org/issue/31562, described in isVendoredPackage. 275 // We would like Create and CreateFromDir to produce the same result 276 // for a set of files, whether expressed as a directory tree or zip. 277 if isVendoredPackage(slashPath) { 278 return filepath.SkipDir 279 } 280 281 // Skip submodules (directories containing go.mod files). 282 if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() { 283 return filepath.SkipDir 284 } 285 return nil 286 } 287 288 if info.Mode().IsRegular() { 289 if !isVendoredPackage(slashPath) { 290 files = append(files, dirFile{ 291 filePath: filePath, 292 slashPath: slashPath, 293 info: info, 294 }) 295 } 296 return nil 297 } 298 299 // Not a regular file or a directory. Probably a symbolic link. 300 // Irregular files are ignored, so skip it. 301 return nil 302 }) 303 if err != nil { 304 return err 305 } 306 307 return Create(w, m, files) 308} 309 310type dirFile struct { 311 filePath, slashPath string 312 info os.FileInfo 313} 314 315func (f dirFile) Path() string { return f.slashPath } 316func (f dirFile) Lstat() (os.FileInfo, error) { return f.info, nil } 317func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) } 318 319// isVendoredPackage attempts to report whether the given filename is contained 320// in a package whose import path contains (but does not end with) the component 321// "vendor". 322// 323// Unfortunately, isVendoredPackage reports false positives for files in any 324// non-top-level package whose import path ends in "vendor". 325func isVendoredPackage(name string) bool { 326 var i int 327 if strings.HasPrefix(name, "vendor/") { 328 i += len("vendor/") 329 } else if j := strings.Index(name, "/vendor/"); j >= 0 { 330 // This offset looks incorrect; this should probably be 331 // 332 // i = j + len("/vendor/") 333 // 334 // (See https://golang.org/issue/31562 and https://golang.org/issue/37397.) 335 // Unfortunately, we can't fix it without invalidating module checksums. 336 i += len("/vendor/") 337 } else { 338 return false 339 } 340 return strings.Contains(name[i:], "/") 341} 342 343// Unzip extracts the contents of a module zip file to a directory. 344// 345// Unzip checks all restrictions listed in the package documentation and returns 346// an error if the zip archive is not valid. In some cases, files may be written 347// to dir before an error is returned (for example, if a file's uncompressed 348// size does not match its declared size). 349// 350// dir may or may not exist: Unzip will create it and any missing parent 351// directories if it doesn't exist. If dir exists, it must be empty. 352func Unzip(dir string, m module.Version, zipFile string) (err error) { 353 defer func() { 354 if err != nil { 355 err = &zipError{verb: "unzip", path: zipFile, err: err} 356 } 357 }() 358 359 if vers := module.CanonicalVersion(m.Version); vers != m.Version { 360 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers) 361 } 362 if err := module.Check(m.Path, m.Version); err != nil { 363 return err 364 } 365 366 // Check that the directory is empty. Don't create it yet in case there's 367 // an error reading the zip. 368 files, _ := ioutil.ReadDir(dir) 369 if len(files) > 0 { 370 return fmt.Errorf("target directory %v exists and is not empty", dir) 371 } 372 373 // Open the zip file and ensure it's under the size limit. 374 f, err := os.Open(zipFile) 375 if err != nil { 376 return err 377 } 378 defer f.Close() 379 info, err := f.Stat() 380 if err != nil { 381 return err 382 } 383 zipSize := info.Size() 384 if zipSize > MaxZipFile { 385 return fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile) 386 } 387 388 z, err := zip.NewReader(f, zipSize) 389 if err != nil { 390 return err 391 } 392 393 // Check total size, valid file names. 394 collisions := make(collisionChecker) 395 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version) 396 var size int64 397 for _, zf := range z.File { 398 if !strings.HasPrefix(zf.Name, prefix) { 399 return fmt.Errorf("unexpected file name %s", zf.Name) 400 } 401 name := zf.Name[len(prefix):] 402 if name == "" { 403 continue 404 } 405 isDir := strings.HasSuffix(name, "/") 406 if isDir { 407 name = name[:len(name)-1] 408 } 409 if path.Clean(name) != name { 410 return fmt.Errorf("invalid file name %s", zf.Name) 411 } 412 if err := module.CheckFilePath(name); err != nil { 413 return err 414 } 415 if err := collisions.check(name, isDir); err != nil { 416 return err 417 } 418 if isDir { 419 continue 420 } 421 if base := path.Base(name); strings.EqualFold(base, "go.mod") { 422 if base != name { 423 return fmt.Errorf("found go.mod file not in module root directory (%s)", zf.Name) 424 } else if name != "go.mod" { 425 return fmt.Errorf("found file named %s, want all lower-case go.mod", zf.Name) 426 } 427 } 428 s := int64(zf.UncompressedSize64) 429 if s < 0 || MaxZipFile-size < s { 430 return fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile) 431 } 432 size += s 433 if name == "go.mod" && s > MaxGoMod { 434 return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod) 435 } 436 if name == "LICENSE" && s > MaxLICENSE { 437 return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE) 438 } 439 } 440 441 // Unzip, enforcing sizes checked earlier. 442 if err := os.MkdirAll(dir, 0777); err != nil { 443 return err 444 } 445 for _, zf := range z.File { 446 name := zf.Name[len(prefix):] 447 if name == "" || strings.HasSuffix(name, "/") { 448 continue 449 } 450 dst := filepath.Join(dir, name) 451 if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil { 452 return err 453 } 454 w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444) 455 if err != nil { 456 return err 457 } 458 r, err := zf.Open() 459 if err != nil { 460 w.Close() 461 return err 462 } 463 lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1} 464 _, err = io.Copy(w, lr) 465 r.Close() 466 if err != nil { 467 w.Close() 468 return err 469 } 470 if err := w.Close(); err != nil { 471 return err 472 } 473 if lr.N <= 0 { 474 return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64) 475 } 476 } 477 478 return nil 479} 480 481// collisionChecker finds case-insensitive name collisions and paths that 482// are listed as both files and directories. 483// 484// The keys of this map are processed with strToFold. pathInfo has the original 485// path for each folded path. 486type collisionChecker map[string]pathInfo 487 488type pathInfo struct { 489 path string 490 isDir bool 491} 492 493func (cc collisionChecker) check(p string, isDir bool) error { 494 fold := strToFold(p) 495 if other, ok := cc[fold]; ok { 496 if p != other.path { 497 return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p) 498 } 499 if isDir != other.isDir { 500 return fmt.Errorf("entry %q is both a file and a directory", p) 501 } 502 if !isDir { 503 return fmt.Errorf("multiple entries for file %q", p) 504 } 505 // It's not an error if check is called with the same directory multiple 506 // times. check is called recursively on parent directories, so check 507 // may be called on the same directory many times. 508 } else { 509 cc[fold] = pathInfo{path: p, isDir: isDir} 510 } 511 512 if parent := path.Dir(p); parent != "." { 513 return cc.check(parent, true) 514 } 515 return nil 516} 517 518type zipError struct { 519 verb, path string 520 err error 521} 522 523func (e *zipError) Error() string { 524 if e.path == "" { 525 return fmt.Sprintf("%s: %v", e.verb, e.err) 526 } else { 527 return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err) 528 } 529} 530 531func (e *zipError) Unwrap() error { 532 return e.err 533} 534 535// strToFold returns a string with the property that 536// strings.EqualFold(s, t) iff strToFold(s) == strToFold(t) 537// This lets us test a large set of strings for fold-equivalent 538// duplicates without making a quadratic number of calls 539// to EqualFold. Note that strings.ToUpper and strings.ToLower 540// do not have the desired property in some corner cases. 541func strToFold(s string) string { 542 // Fast path: all ASCII, no upper case. 543 // Most paths look like this already. 544 for i := 0; i < len(s); i++ { 545 c := s[i] 546 if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' { 547 goto Slow 548 } 549 } 550 return s 551 552Slow: 553 var buf bytes.Buffer 554 for _, r := range s { 555 // SimpleFold(x) cycles to the next equivalent rune > x 556 // or wraps around to smaller values. Iterate until it wraps, 557 // and we've found the minimum value. 558 for { 559 r0 := r 560 r = unicode.SimpleFold(r0) 561 if r <= r0 { 562 break 563 } 564 } 565 // Exception to allow fast path above: A-Z => a-z 566 if 'A' <= r && r <= 'Z' { 567 r += 'a' - 'A' 568 } 569 buf.WriteRune(r) 570 } 571 return buf.String() 572} 573