1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package zip provides functions for creating and extracting module zip files.
6//
7// Module zip files have several restrictions listed below. These are necessary
8// to ensure that module zip files can be extracted consistently on supported
9// platforms and file systems.
10//
11// • All file paths within a zip file must start with "<module>@<version>/",
12// where "<module>" is the module path and "<version>" is the version.
13// The module path must be valid (see golang.org/x/mod/module.CheckPath).
14// The version must be valid and canonical (see
15// golang.org/x/mod/module.CanonicalVersion). The path must have a major
16// version suffix consistent with the version (see
17// golang.org/x/mod/module.Check). The part of the file path after the
18// "<module>@<version>/" prefix must be valid (see
19// golang.org/x/mod/module.CheckFilePath).
20//
21// • No two file paths may be equal under Unicode case-folding (see
22// strings.EqualFold).
23//
24// • A go.mod file may or may not appear in the top-level directory. If present,
25// it must be named "go.mod", not any other case. Files named "go.mod"
26// are not allowed in any other directory.
27//
28// • The total size in bytes of a module zip file may be at most MaxZipFile
29// bytes (500 MiB). The total uncompressed size of the files within the
30// zip may also be at most MaxZipFile bytes.
31//
32// • Each file's uncompressed size must match its declared 64-bit uncompressed
33// size in the zip file header.
34//
35// • If the zip contains files named "<module>@<version>/go.mod" or
36// "<module>@<version>/LICENSE", their sizes in bytes may be at most
37// MaxGoMod or MaxLICENSE, respectively (both are 16 MiB).
38//
39// • Empty directories are ignored. File permissions and timestamps are also
40// ignored.
41//
42// • Symbolic links and other irregular files are not allowed.
43//
44// Note that this package does not provide hashing functionality. See
45// golang.org/x/mod/sumdb/dirhash.
46package zip
47
48import (
49	"archive/zip"
50	"bytes"
51	"fmt"
52	"io"
53	"io/ioutil"
54	"os"
55	"path"
56	"path/filepath"
57	"strings"
58	"unicode"
59	"unicode/utf8"
60
61	"golang.org/x/mod/module"
62)
63
64const (
65	// MaxZipFile is the maximum size in bytes of a module zip file. The
66	// go command will report an error if either the zip file or its extracted
67	// content is larger than this.
68	MaxZipFile = 500 << 20
69
70	// MaxGoMod is the maximum size in bytes of a go.mod file within a
71	// module zip file.
72	MaxGoMod = 16 << 20
73
74	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
75	// module zip file.
76	MaxLICENSE = 16 << 20
77)
78
79// File provides an abstraction for a file in a directory, zip, or anything
80// else that looks like a file.
81type File interface {
82	// Path returns a clean slash-separated relative path from the module root
83	// directory to the file.
84	Path() string
85
86	// Lstat returns information about the file. If the file is a symbolic link,
87	// Lstat returns information about the link itself, not the file it points to.
88	Lstat() (os.FileInfo, error)
89
90	// Open provides access to the data within a regular file. Open may return
91	// an error if called on a directory or symbolic link.
92	Open() (io.ReadCloser, error)
93}
94
95// Create builds a zip archive for module m from an abstract list of files
96// and writes it to w.
97//
98// Create verifies the restrictions described in the package documentation
99// and should not produce an archive that Unzip cannot extract. Create does not
100// include files in the output archive if they don't belong in the module zip.
101// In particular, Create will not include files in modules found in
102// subdirectories, most files in vendor directories, or irregular files (such
103// as symbolic links) in the output archive.
104func Create(w io.Writer, m module.Version, files []File) (err error) {
105	defer func() {
106		if err != nil {
107			err = &zipError{verb: "create zip", err: err}
108		}
109	}()
110
111	// Check that the version is canonical, the module path is well-formed, and
112	// the major version suffix matches the major version.
113	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
114		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
115	}
116	if err := module.Check(m.Path, m.Version); err != nil {
117		return err
118	}
119
120	// Find directories containing go.mod files (other than the root).
121	// These directories will not be included in the output zip.
122	haveGoMod := make(map[string]bool)
123	for _, f := range files {
124		dir, base := path.Split(f.Path())
125		if strings.EqualFold(base, "go.mod") {
126			info, err := f.Lstat()
127			if err != nil {
128				return err
129			}
130			if info.Mode().IsRegular() {
131				haveGoMod[dir] = true
132			}
133		}
134	}
135
136	inSubmodule := func(p string) bool {
137		for {
138			dir, _ := path.Split(p)
139			if dir == "" {
140				return false
141			}
142			if haveGoMod[dir] {
143				return true
144			}
145			p = dir[:len(dir)-1]
146		}
147	}
148
149	// Create the module zip file.
150	zw := zip.NewWriter(w)
151	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
152
153	addFile := func(f File, path string, size int64) error {
154		rc, err := f.Open()
155		if err != nil {
156			return err
157		}
158		defer rc.Close()
159		w, err := zw.Create(prefix + path)
160		if err != nil {
161			return err
162		}
163		lr := &io.LimitedReader{R: rc, N: size + 1}
164		if _, err := io.Copy(w, lr); err != nil {
165			return err
166		}
167		if lr.N <= 0 {
168			return fmt.Errorf("file %q is larger than declared size", path)
169		}
170		return nil
171	}
172
173	collisions := make(collisionChecker)
174	maxSize := int64(MaxZipFile)
175	for _, f := range files {
176		p := f.Path()
177		if p != path.Clean(p) {
178			return fmt.Errorf("file path %s is not clean", p)
179		}
180		if path.IsAbs(p) {
181			return fmt.Errorf("file path %s is not relative", p)
182		}
183		if isVendoredPackage(p) || inSubmodule(p) {
184			continue
185		}
186		if p == ".hg_archival.txt" {
187			// Inserted by hg archive.
188			// The go command drops this regardless of the VCS being used.
189			continue
190		}
191		if err := module.CheckFilePath(p); err != nil {
192			return err
193		}
194		if strings.ToLower(p) == "go.mod" && p != "go.mod" {
195			return fmt.Errorf("found file named %s, want all lower-case go.mod", p)
196		}
197		info, err := f.Lstat()
198		if err != nil {
199			return err
200		}
201		if err := collisions.check(p, info.IsDir()); err != nil {
202			return err
203		}
204		if !info.Mode().IsRegular() {
205			// Skip symbolic links (golang.org/issue/27093).
206			continue
207		}
208		size := info.Size()
209		if size < 0 || maxSize < size {
210			return fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
211		}
212		maxSize -= size
213		if p == "go.mod" && size > MaxGoMod {
214			return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
215		}
216		if p == "LICENSE" && size > MaxLICENSE {
217			return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
218		}
219
220		if err := addFile(f, p, size); err != nil {
221			return err
222		}
223	}
224
225	return zw.Close()
226}
227
228// CreateFromDir creates a module zip file for module m from the contents of
229// a directory, dir. The zip content is written to w.
230//
231// CreateFromDir verifies the restrictions described in the package
232// documentation and should not produce an archive that Unzip cannot extract.
233// CreateFromDir does not include files in the output archive if they don't
234// belong in the module zip. In particular, CreateFromDir will not include
235// files in modules found in subdirectories, most files in vendor directories,
236// or irregular files (such as symbolic links) in the output archive.
237// Additionally, unlike Create, CreateFromDir will not include directories
238// named ".bzr", ".git", ".hg", or ".svn".
239func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
240	defer func() {
241		if zerr, ok := err.(*zipError); ok {
242			zerr.path = dir
243		} else if err != nil {
244			err = &zipError{verb: "create zip", path: dir, err: err}
245		}
246	}()
247
248	var files []File
249	err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
250		relPath, err := filepath.Rel(dir, filePath)
251		if err != nil {
252			return err
253		}
254		slashPath := filepath.ToSlash(relPath)
255
256		if info.IsDir() {
257			if filePath == dir {
258				// Don't skip the top-level directory.
259				return nil
260			}
261
262			// Skip VCS directories.
263			// fossil repos are regular files with arbitrary names, so we don't try
264			// to exclude them.
265			switch filepath.Base(filePath) {
266			case ".bzr", ".git", ".hg", ".svn":
267				return filepath.SkipDir
268			}
269
270			// Skip some subdirectories inside vendor, but maintain bug
271			// golang.org/issue/31562, described in isVendoredPackage.
272			// We would like Create and CreateFromDir to produce the same result
273			// for a set of files, whether expressed as a directory tree or zip.
274			if isVendoredPackage(slashPath) {
275				return filepath.SkipDir
276			}
277
278			// Skip submodules (directories containing go.mod files).
279			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
280				return filepath.SkipDir
281			}
282			return nil
283		}
284
285		if info.Mode().IsRegular() {
286			if !isVendoredPackage(slashPath) {
287				files = append(files, dirFile{
288					filePath:  filePath,
289					slashPath: slashPath,
290					info:      info,
291				})
292			}
293			return nil
294		}
295
296		// Not a regular file or a directory. Probably a symbolic link.
297		// Irregular files are ignored, so skip it.
298		return nil
299	})
300	if err != nil {
301		return err
302	}
303
304	return Create(w, m, files)
305}
306
307type dirFile struct {
308	filePath, slashPath string
309	info                os.FileInfo
310}
311
312func (f dirFile) Path() string                 { return f.slashPath }
313func (f dirFile) Lstat() (os.FileInfo, error)  { return f.info, nil }
314func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
315
316func isVendoredPackage(name string) bool {
317	var i int
318	if strings.HasPrefix(name, "vendor/") {
319		i += len("vendor/")
320	} else if j := strings.Index(name, "/vendor/"); j >= 0 {
321		// This offset looks incorrect; this should probably be
322		//
323		// 	i = j + len("/vendor/")
324		//
325		// (See https://golang.org/issue/31562.)
326		//
327		// Unfortunately, we can't fix it without invalidating checksums.
328		// Fortunately, the error appears to be strictly conservative: we'll retain
329		// vendored packages that we should have pruned, but we won't prune
330		// non-vendored packages that we should have retained.
331		//
332		// Since this defect doesn't seem to break anything, it's not worth fixing
333		// for now.
334		i += len("/vendor/")
335	} else {
336		return false
337	}
338	return strings.Contains(name[i:], "/")
339}
340
341// Unzip extracts the contents of a module zip file to a directory.
342//
343// Unzip checks all restrictions listed in the package documentation and returns
344// an error if the zip archive is not valid. In some cases, files may be written
345// to dir before an error is returned (for example, if a file's uncompressed
346// size does not match its declared size).
347//
348// dir may or may not exist: Unzip will create it and any missing parent
349// directories if it doesn't exist. If dir exists, it must be empty.
350func Unzip(dir string, m module.Version, zipFile string) (err error) {
351	defer func() {
352		if err != nil {
353			err = &zipError{verb: "unzip", path: zipFile, err: err}
354		}
355	}()
356
357	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
358		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
359	}
360	if err := module.Check(m.Path, m.Version); err != nil {
361		return err
362	}
363
364	// Check that the directory is empty. Don't create it yet in case there's
365	// an error reading the zip.
366	files, _ := ioutil.ReadDir(dir)
367	if len(files) > 0 {
368		return fmt.Errorf("target directory %v exists and is not empty", dir)
369	}
370
371	// Open the zip file and ensure it's under the size limit.
372	f, err := os.Open(zipFile)
373	if err != nil {
374		return err
375	}
376	defer f.Close()
377	info, err := f.Stat()
378	if err != nil {
379		return err
380	}
381	zipSize := info.Size()
382	if zipSize > MaxZipFile {
383		return fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)
384	}
385
386	z, err := zip.NewReader(f, zipSize)
387	if err != nil {
388		return err
389	}
390
391	// Check total size, valid file names.
392	collisions := make(collisionChecker)
393	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
394	var size int64
395	for _, zf := range z.File {
396		if !strings.HasPrefix(zf.Name, prefix) {
397			return fmt.Errorf("unexpected file name %s", zf.Name)
398		}
399		name := zf.Name[len(prefix):]
400		if name == "" {
401			continue
402		}
403		isDir := strings.HasSuffix(name, "/")
404		if isDir {
405			name = name[:len(name)-1]
406		}
407		if path.Clean(name) != name {
408			return fmt.Errorf("invalid file name %s", zf.Name)
409		}
410		if err := module.CheckFilePath(name); err != nil {
411			return err
412		}
413		if err := collisions.check(name, isDir); err != nil {
414			return err
415		}
416		if isDir {
417			continue
418		}
419		if base := path.Base(name); strings.EqualFold(base, "go.mod") {
420			if base != name {
421				return fmt.Errorf("found go.mod file not in module root directory (%s)", zf.Name)
422			} else if name != "go.mod" {
423				return fmt.Errorf("found file named %s, want all lower-case go.mod", zf.Name)
424			}
425		}
426		s := int64(zf.UncompressedSize64)
427		if s < 0 || MaxZipFile-size < s {
428			return fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
429		}
430		size += s
431		if name == "go.mod" && s > MaxGoMod {
432			return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
433		}
434		if name == "LICENSE" && s > MaxLICENSE {
435			return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
436		}
437	}
438
439	// Unzip, enforcing sizes checked earlier.
440	if err := os.MkdirAll(dir, 0777); err != nil {
441		return err
442	}
443	for _, zf := range z.File {
444		name := zf.Name[len(prefix):]
445		if name == "" || strings.HasSuffix(name, "/") {
446			continue
447		}
448		dst := filepath.Join(dir, name)
449		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
450			return err
451		}
452		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
453		if err != nil {
454			return err
455		}
456		r, err := zf.Open()
457		if err != nil {
458			w.Close()
459			return err
460		}
461		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
462		_, err = io.Copy(w, lr)
463		r.Close()
464		if err != nil {
465			w.Close()
466			return err
467		}
468		if err := w.Close(); err != nil {
469			return err
470		}
471		if lr.N <= 0 {
472			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
473		}
474	}
475
476	return nil
477}
478
479// collisionChecker finds case-insensitive name collisions and paths that
480// are listed as both files and directories.
481//
482// The keys of this map are processed with strToFold. pathInfo has the original
483// path for each folded path.
484type collisionChecker map[string]pathInfo
485
486type pathInfo struct {
487	path  string
488	isDir bool
489}
490
491func (cc collisionChecker) check(p string, isDir bool) error {
492	fold := strToFold(p)
493	if other, ok := cc[fold]; ok {
494		if p != other.path {
495			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
496		}
497		if isDir != other.isDir {
498			return fmt.Errorf("entry %q is both a file and a directory", p)
499		}
500		if !isDir {
501			return fmt.Errorf("multiple entries for file %q", p)
502		}
503		// It's not an error if check is called with the same directory multiple
504		// times. check is called recursively on parent directories, so check
505		// may be called on the same directory many times.
506	} else {
507		cc[fold] = pathInfo{path: p, isDir: isDir}
508	}
509
510	if parent := path.Dir(p); parent != "." {
511		return cc.check(parent, true)
512	}
513	return nil
514}
515
516type zipError struct {
517	verb, path string
518	err        error
519}
520
521func (e *zipError) Error() string {
522	if e.path == "" {
523		return fmt.Sprintf("%s: %v", e.verb, e.err)
524	} else {
525		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
526	}
527}
528
529func (e *zipError) Unwrap() error {
530	return e.err
531}
532
533// strToFold returns a string with the property that
534//	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
535// This lets us test a large set of strings for fold-equivalent
536// duplicates without making a quadratic number of calls
537// to EqualFold. Note that strings.ToUpper and strings.ToLower
538// do not have the desired property in some corner cases.
539func strToFold(s string) string {
540	// Fast path: all ASCII, no upper case.
541	// Most paths look like this already.
542	for i := 0; i < len(s); i++ {
543		c := s[i]
544		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
545			goto Slow
546		}
547	}
548	return s
549
550Slow:
551	var buf bytes.Buffer
552	for _, r := range s {
553		// SimpleFold(x) cycles to the next equivalent rune > x
554		// or wraps around to smaller values. Iterate until it wraps,
555		// and we've found the minimum value.
556		for {
557			r0 := r
558			r = unicode.SimpleFold(r0)
559			if r <= r0 {
560				break
561			}
562		}
563		// Exception to allow fast path above: A-Z => a-z
564		if 'A' <= r && r <= 'Z' {
565			r += 'a' - 'A'
566		}
567		buf.WriteRune(r)
568	}
569	return buf.String()
570}
571