1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package zip provides functions for creating and extracting module zip files. 6// 7// Module zip files have several restrictions listed below. These are necessary 8// to ensure that module zip files can be extracted consistently on supported 9// platforms and file systems. 10// 11// • All file paths within a zip file must start with "<module>@<version>/", 12// where "<module>" is the module path and "<version>" is the version. 13// The module path must be valid (see golang.org/x/mod/module.CheckPath). 14// The version must be valid and canonical (see 15// golang.org/x/mod/module.CanonicalVersion). The path must have a major 16// version suffix consistent with the version (see 17// golang.org/x/mod/module.Check). The part of the file path after the 18// "<module>@<version>/" prefix must be valid (see 19// golang.org/x/mod/module.CheckFilePath). 20// 21// • No two file paths may be equal under Unicode case-folding (see 22// strings.EqualFold). 23// 24// • A go.mod file may or may not appear in the top-level directory. If present, 25// it must be named "go.mod", not any other case. Files named "go.mod" 26// are not allowed in any other directory. 27// 28// • The total size in bytes of a module zip file may be at most MaxZipFile 29// bytes (500 MiB). The total uncompressed size of the files within the 30// zip may also be at most MaxZipFile bytes. 31// 32// • Each file's uncompressed size must match its declared 64-bit uncompressed 33// size in the zip file header. 34// 35// • If the zip contains files named "<module>@<version>/go.mod" or 36// "<module>@<version>/LICENSE", their sizes in bytes may be at most 37// MaxGoMod or MaxLICENSE, respectively (both are 16 MiB). 38// 39// • Empty directories are ignored. File permissions and timestamps are also 40// ignored. 41// 42// • Symbolic links and other irregular files are not allowed. 43// 44// Note that this package does not provide hashing functionality. See 45// golang.org/x/mod/sumdb/dirhash. 46package zip 47 48import ( 49 "archive/zip" 50 "bytes" 51 "fmt" 52 "io" 53 "io/ioutil" 54 "os" 55 "path" 56 "path/filepath" 57 "strings" 58 "unicode" 59 "unicode/utf8" 60 61 "golang.org/x/mod/module" 62) 63 64const ( 65 // MaxZipFile is the maximum size in bytes of a module zip file. The 66 // go command will report an error if either the zip file or its extracted 67 // content is larger than this. 68 MaxZipFile = 500 << 20 69 70 // MaxGoMod is the maximum size in bytes of a go.mod file within a 71 // module zip file. 72 MaxGoMod = 16 << 20 73 74 // MaxLICENSE is the maximum size in bytes of a LICENSE file within a 75 // module zip file. 76 MaxLICENSE = 16 << 20 77) 78 79// File provides an abstraction for a file in a directory, zip, or anything 80// else that looks like a file. 81type File interface { 82 // Path returns a clean slash-separated relative path from the module root 83 // directory to the file. 84 Path() string 85 86 // Lstat returns information about the file. If the file is a symbolic link, 87 // Lstat returns information about the link itself, not the file it points to. 88 Lstat() (os.FileInfo, error) 89 90 // Open provides access to the data within a regular file. Open may return 91 // an error if called on a directory or symbolic link. 92 Open() (io.ReadCloser, error) 93} 94 95// Create builds a zip archive for module m from an abstract list of files 96// and writes it to w. 97// 98// Create verifies the restrictions described in the package documentation 99// and should not produce an archive that Unzip cannot extract. Create does not 100// include files in the output archive if they don't belong in the module zip. 101// In particular, Create will not include files in modules found in 102// subdirectories, most files in vendor directories, or irregular files (such 103// as symbolic links) in the output archive. 104func Create(w io.Writer, m module.Version, files []File) (err error) { 105 defer func() { 106 if err != nil { 107 err = &zipError{verb: "create zip", err: err} 108 } 109 }() 110 111 // Check that the version is canonical, the module path is well-formed, and 112 // the major version suffix matches the major version. 113 if vers := module.CanonicalVersion(m.Version); vers != m.Version { 114 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers) 115 } 116 if err := module.Check(m.Path, m.Version); err != nil { 117 return err 118 } 119 120 // Find directories containing go.mod files (other than the root). 121 // These directories will not be included in the output zip. 122 haveGoMod := make(map[string]bool) 123 for _, f := range files { 124 dir, base := path.Split(f.Path()) 125 if strings.EqualFold(base, "go.mod") { 126 info, err := f.Lstat() 127 if err != nil { 128 return err 129 } 130 if info.Mode().IsRegular() { 131 haveGoMod[dir] = true 132 } 133 } 134 } 135 136 inSubmodule := func(p string) bool { 137 for { 138 dir, _ := path.Split(p) 139 if dir == "" { 140 return false 141 } 142 if haveGoMod[dir] { 143 return true 144 } 145 p = dir[:len(dir)-1] 146 } 147 } 148 149 // Create the module zip file. 150 zw := zip.NewWriter(w) 151 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version) 152 153 addFile := func(f File, path string, size int64) error { 154 rc, err := f.Open() 155 if err != nil { 156 return err 157 } 158 defer rc.Close() 159 w, err := zw.Create(prefix + path) 160 if err != nil { 161 return err 162 } 163 lr := &io.LimitedReader{R: rc, N: size + 1} 164 if _, err := io.Copy(w, lr); err != nil { 165 return err 166 } 167 if lr.N <= 0 { 168 return fmt.Errorf("file %q is larger than declared size", path) 169 } 170 return nil 171 } 172 173 collisions := make(collisionChecker) 174 maxSize := int64(MaxZipFile) 175 for _, f := range files { 176 p := f.Path() 177 if p != path.Clean(p) { 178 return fmt.Errorf("file path %s is not clean", p) 179 } 180 if path.IsAbs(p) { 181 return fmt.Errorf("file path %s is not relative", p) 182 } 183 if isVendoredPackage(p) || inSubmodule(p) { 184 continue 185 } 186 if p == ".hg_archival.txt" { 187 // Inserted by hg archive. 188 // The go command drops this regardless of the VCS being used. 189 continue 190 } 191 if err := module.CheckFilePath(p); err != nil { 192 return err 193 } 194 if strings.ToLower(p) == "go.mod" && p != "go.mod" { 195 return fmt.Errorf("found file named %s, want all lower-case go.mod", p) 196 } 197 info, err := f.Lstat() 198 if err != nil { 199 return err 200 } 201 if err := collisions.check(p, info.IsDir()); err != nil { 202 return err 203 } 204 if !info.Mode().IsRegular() { 205 // Skip symbolic links (golang.org/issue/27093). 206 continue 207 } 208 size := info.Size() 209 if size < 0 || maxSize < size { 210 return fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile) 211 } 212 maxSize -= size 213 if p == "go.mod" && size > MaxGoMod { 214 return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod) 215 } 216 if p == "LICENSE" && size > MaxLICENSE { 217 return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE) 218 } 219 220 if err := addFile(f, p, size); err != nil { 221 return err 222 } 223 } 224 225 return zw.Close() 226} 227 228// CreateFromDir creates a module zip file for module m from the contents of 229// a directory, dir. The zip content is written to w. 230// 231// CreateFromDir verifies the restrictions described in the package 232// documentation and should not produce an archive that Unzip cannot extract. 233// CreateFromDir does not include files in the output archive if they don't 234// belong in the module zip. In particular, CreateFromDir will not include 235// files in modules found in subdirectories, most files in vendor directories, 236// or irregular files (such as symbolic links) in the output archive. 237// Additionally, unlike Create, CreateFromDir will not include directories 238// named ".bzr", ".git", ".hg", or ".svn". 239func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) { 240 defer func() { 241 if zerr, ok := err.(*zipError); ok { 242 zerr.path = dir 243 } else if err != nil { 244 err = &zipError{verb: "create zip", path: dir, err: err} 245 } 246 }() 247 248 var files []File 249 err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error { 250 relPath, err := filepath.Rel(dir, filePath) 251 if err != nil { 252 return err 253 } 254 slashPath := filepath.ToSlash(relPath) 255 256 if info.IsDir() { 257 if filePath == dir { 258 // Don't skip the top-level directory. 259 return nil 260 } 261 262 // Skip VCS directories. 263 // fossil repos are regular files with arbitrary names, so we don't try 264 // to exclude them. 265 switch filepath.Base(filePath) { 266 case ".bzr", ".git", ".hg", ".svn": 267 return filepath.SkipDir 268 } 269 270 // Skip some subdirectories inside vendor, but maintain bug 271 // golang.org/issue/31562, described in isVendoredPackage. 272 // We would like Create and CreateFromDir to produce the same result 273 // for a set of files, whether expressed as a directory tree or zip. 274 if isVendoredPackage(slashPath) { 275 return filepath.SkipDir 276 } 277 278 // Skip submodules (directories containing go.mod files). 279 if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() { 280 return filepath.SkipDir 281 } 282 return nil 283 } 284 285 if info.Mode().IsRegular() { 286 if !isVendoredPackage(slashPath) { 287 files = append(files, dirFile{ 288 filePath: filePath, 289 slashPath: slashPath, 290 info: info, 291 }) 292 } 293 return nil 294 } 295 296 // Not a regular file or a directory. Probably a symbolic link. 297 // Irregular files are ignored, so skip it. 298 return nil 299 }) 300 if err != nil { 301 return err 302 } 303 304 return Create(w, m, files) 305} 306 307type dirFile struct { 308 filePath, slashPath string 309 info os.FileInfo 310} 311 312func (f dirFile) Path() string { return f.slashPath } 313func (f dirFile) Lstat() (os.FileInfo, error) { return f.info, nil } 314func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) } 315 316func isVendoredPackage(name string) bool { 317 var i int 318 if strings.HasPrefix(name, "vendor/") { 319 i += len("vendor/") 320 } else if j := strings.Index(name, "/vendor/"); j >= 0 { 321 // This offset looks incorrect; this should probably be 322 // 323 // i = j + len("/vendor/") 324 // 325 // (See https://golang.org/issue/31562.) 326 // 327 // Unfortunately, we can't fix it without invalidating checksums. 328 // Fortunately, the error appears to be strictly conservative: we'll retain 329 // vendored packages that we should have pruned, but we won't prune 330 // non-vendored packages that we should have retained. 331 // 332 // Since this defect doesn't seem to break anything, it's not worth fixing 333 // for now. 334 i += len("/vendor/") 335 } else { 336 return false 337 } 338 return strings.Contains(name[i:], "/") 339} 340 341// Unzip extracts the contents of a module zip file to a directory. 342// 343// Unzip checks all restrictions listed in the package documentation and returns 344// an error if the zip archive is not valid. In some cases, files may be written 345// to dir before an error is returned (for example, if a file's uncompressed 346// size does not match its declared size). 347// 348// dir may or may not exist: Unzip will create it and any missing parent 349// directories if it doesn't exist. If dir exists, it must be empty. 350func Unzip(dir string, m module.Version, zipFile string) (err error) { 351 defer func() { 352 if err != nil { 353 err = &zipError{verb: "unzip", path: zipFile, err: err} 354 } 355 }() 356 357 if vers := module.CanonicalVersion(m.Version); vers != m.Version { 358 return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers) 359 } 360 if err := module.Check(m.Path, m.Version); err != nil { 361 return err 362 } 363 364 // Check that the directory is empty. Don't create it yet in case there's 365 // an error reading the zip. 366 files, _ := ioutil.ReadDir(dir) 367 if len(files) > 0 { 368 return fmt.Errorf("target directory %v exists and is not empty", dir) 369 } 370 371 // Open the zip file and ensure it's under the size limit. 372 f, err := os.Open(zipFile) 373 if err != nil { 374 return err 375 } 376 defer f.Close() 377 info, err := f.Stat() 378 if err != nil { 379 return err 380 } 381 zipSize := info.Size() 382 if zipSize > MaxZipFile { 383 return fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile) 384 } 385 386 z, err := zip.NewReader(f, zipSize) 387 if err != nil { 388 return err 389 } 390 391 // Check total size, valid file names. 392 collisions := make(collisionChecker) 393 prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version) 394 var size int64 395 for _, zf := range z.File { 396 if !strings.HasPrefix(zf.Name, prefix) { 397 return fmt.Errorf("unexpected file name %s", zf.Name) 398 } 399 name := zf.Name[len(prefix):] 400 if name == "" { 401 continue 402 } 403 isDir := strings.HasSuffix(name, "/") 404 if isDir { 405 name = name[:len(name)-1] 406 } 407 if path.Clean(name) != name { 408 return fmt.Errorf("invalid file name %s", zf.Name) 409 } 410 if err := module.CheckFilePath(name); err != nil { 411 return err 412 } 413 if err := collisions.check(name, isDir); err != nil { 414 return err 415 } 416 if isDir { 417 continue 418 } 419 if base := path.Base(name); strings.EqualFold(base, "go.mod") { 420 if base != name { 421 return fmt.Errorf("found go.mod file not in module root directory (%s)", zf.Name) 422 } else if name != "go.mod" { 423 return fmt.Errorf("found file named %s, want all lower-case go.mod", zf.Name) 424 } 425 } 426 s := int64(zf.UncompressedSize64) 427 if s < 0 || MaxZipFile-size < s { 428 return fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile) 429 } 430 size += s 431 if name == "go.mod" && s > MaxGoMod { 432 return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod) 433 } 434 if name == "LICENSE" && s > MaxLICENSE { 435 return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE) 436 } 437 } 438 439 // Unzip, enforcing sizes checked earlier. 440 if err := os.MkdirAll(dir, 0777); err != nil { 441 return err 442 } 443 for _, zf := range z.File { 444 name := zf.Name[len(prefix):] 445 if name == "" || strings.HasSuffix(name, "/") { 446 continue 447 } 448 dst := filepath.Join(dir, name) 449 if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil { 450 return err 451 } 452 w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444) 453 if err != nil { 454 return err 455 } 456 r, err := zf.Open() 457 if err != nil { 458 w.Close() 459 return err 460 } 461 lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1} 462 _, err = io.Copy(w, lr) 463 r.Close() 464 if err != nil { 465 w.Close() 466 return err 467 } 468 if err := w.Close(); err != nil { 469 return err 470 } 471 if lr.N <= 0 { 472 return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64) 473 } 474 } 475 476 return nil 477} 478 479// collisionChecker finds case-insensitive name collisions and paths that 480// are listed as both files and directories. 481// 482// The keys of this map are processed with strToFold. pathInfo has the original 483// path for each folded path. 484type collisionChecker map[string]pathInfo 485 486type pathInfo struct { 487 path string 488 isDir bool 489} 490 491func (cc collisionChecker) check(p string, isDir bool) error { 492 fold := strToFold(p) 493 if other, ok := cc[fold]; ok { 494 if p != other.path { 495 return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p) 496 } 497 if isDir != other.isDir { 498 return fmt.Errorf("entry %q is both a file and a directory", p) 499 } 500 if !isDir { 501 return fmt.Errorf("multiple entries for file %q", p) 502 } 503 // It's not an error if check is called with the same directory multiple 504 // times. check is called recursively on parent directories, so check 505 // may be called on the same directory many times. 506 } else { 507 cc[fold] = pathInfo{path: p, isDir: isDir} 508 } 509 510 if parent := path.Dir(p); parent != "." { 511 return cc.check(parent, true) 512 } 513 return nil 514} 515 516type zipError struct { 517 verb, path string 518 err error 519} 520 521func (e *zipError) Error() string { 522 if e.path == "" { 523 return fmt.Sprintf("%s: %v", e.verb, e.err) 524 } else { 525 return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err) 526 } 527} 528 529func (e *zipError) Unwrap() error { 530 return e.err 531} 532 533// strToFold returns a string with the property that 534// strings.EqualFold(s, t) iff strToFold(s) == strToFold(t) 535// This lets us test a large set of strings for fold-equivalent 536// duplicates without making a quadratic number of calls 537// to EqualFold. Note that strings.ToUpper and strings.ToLower 538// do not have the desired property in some corner cases. 539func strToFold(s string) string { 540 // Fast path: all ASCII, no upper case. 541 // Most paths look like this already. 542 for i := 0; i < len(s); i++ { 543 c := s[i] 544 if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' { 545 goto Slow 546 } 547 } 548 return s 549 550Slow: 551 var buf bytes.Buffer 552 for _, r := range s { 553 // SimpleFold(x) cycles to the next equivalent rune > x 554 // or wraps around to smaller values. Iterate until it wraps, 555 // and we've found the minimum value. 556 for { 557 r0 := r 558 r = unicode.SimpleFold(r0) 559 if r <= r0 { 560 break 561 } 562 } 563 // Exception to allow fast path above: A-Z => a-z 564 if 'A' <= r && r <= 'Z' { 565 r += 'a' - 'A' 566 } 567 buf.WriteRune(r) 568 } 569 return buf.String() 570} 571