1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package zip provides functions for creating and extracting module zip files.
6//
7// Module zip files have several restrictions listed below. These are necessary
8// to ensure that module zip files can be extracted consistently on supported
9// platforms and file systems.
10//
11// • All file paths within a zip file must start with "<module>@<version>/",
12// where "<module>" is the module path and "<version>" is the version.
13// The module path must be valid (see golang.org/x/mod/module.CheckPath).
14// The version must be valid and canonical (see
15// golang.org/x/mod/module.CanonicalVersion). The path must have a major
16// version suffix consistent with the version (see
17// golang.org/x/mod/module.Check). The part of the file path after the
18// "<module>@<version>/" prefix must be valid (see
19// golang.org/x/mod/module.CheckFilePath).
20//
21// • No two file paths may be equal under Unicode case-folding (see
22// strings.EqualFold).
23//
24// • A go.mod file may or may not appear in the top-level directory. If present,
25// it must be named "go.mod", not any other case. Files named "go.mod"
26// are not allowed in any other directory.
27//
28// • The total size in bytes of a module zip file may be at most MaxZipFile
29// bytes (500 MiB). The total uncompressed size of the files within the
30// zip may also be at most MaxZipFile bytes.
31//
32// • Each file's uncompressed size must match its declared 64-bit uncompressed
33// size in the zip file header.
34//
35// • If the zip contains files named "<module>@<version>/go.mod" or
36// "<module>@<version>/LICENSE", their sizes in bytes may be at most
37// MaxGoMod or MaxLICENSE, respectively (both are 16 MiB).
38//
39// • Empty directories are ignored. File permissions and timestamps are also
40// ignored.
41//
42// • Symbolic links and other irregular files are not allowed.
43//
44// Note that this package does not provide hashing functionality. See
45// golang.org/x/mod/sumdb/dirhash.
46package zip
47
48import (
49	"archive/zip"
50	"bytes"
51	"fmt"
52	"io"
53	"io/ioutil"
54	"os"
55	"path"
56	"path/filepath"
57	"strings"
58	"unicode"
59	"unicode/utf8"
60
61	"golang.org/x/mod/module"
62)
63
64const (
65	// MaxZipFile is the maximum size in bytes of a module zip file. The
66	// go command will report an error if either the zip file or its extracted
67	// content is larger than this.
68	MaxZipFile = 500 << 20
69
70	// MaxGoMod is the maximum size in bytes of a go.mod file within a
71	// module zip file.
72	MaxGoMod = 16 << 20
73
74	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
75	// module zip file.
76	MaxLICENSE = 16 << 20
77)
78
79// File provides an abstraction for a file in a directory, zip, or anything
80// else that looks like a file.
81type File interface {
82	// Path returns a clean slash-separated relative path from the module root
83	// directory to the file.
84	Path() string
85
86	// Lstat returns information about the file. If the file is a symbolic link,
87	// Lstat returns information about the link itself, not the file it points to.
88	Lstat() (os.FileInfo, error)
89
90	// Open provides access to the data within a regular file. Open may return
91	// an error if called on a directory or symbolic link.
92	Open() (io.ReadCloser, error)
93}
94
95// Create builds a zip archive for module m from an abstract list of files
96// and writes it to w.
97//
98// Create verifies the restrictions described in the package documentation
99// and should not produce an archive that Unzip cannot extract. Create does not
100// include files in the output archive if they don't belong in the module zip.
101// In particular, Create will not include files in modules found in
102// subdirectories, most files in vendor directories, or irregular files (such
103// as symbolic links) in the output archive.
104func Create(w io.Writer, m module.Version, files []File) (err error) {
105	defer func() {
106		if err != nil {
107			err = &zipError{verb: "create zip", err: err}
108		}
109	}()
110
111	// Check that the version is canonical, the module path is well-formed, and
112	// the major version suffix matches the major version.
113	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
114		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
115	}
116	if err := module.Check(m.Path, m.Version); err != nil {
117		return err
118	}
119
120	// Find directories containing go.mod files (other than the root).
121	// These directories will not be included in the output zip.
122	haveGoMod := make(map[string]bool)
123	for _, f := range files {
124		dir, base := path.Split(f.Path())
125		if strings.EqualFold(base, "go.mod") {
126			info, err := f.Lstat()
127			if err != nil {
128				return err
129			}
130			if info.Mode().IsRegular() {
131				haveGoMod[dir] = true
132			}
133		}
134	}
135
136	inSubmodule := func(p string) bool {
137		for {
138			dir, _ := path.Split(p)
139			if dir == "" {
140				return false
141			}
142			if haveGoMod[dir] {
143				return true
144			}
145			p = dir[:len(dir)-1]
146		}
147	}
148
149	// Create the module zip file.
150	zw := zip.NewWriter(w)
151	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
152
153	addFile := func(f File, path string, size int64) error {
154		rc, err := f.Open()
155		if err != nil {
156			return err
157		}
158		defer rc.Close()
159		w, err := zw.Create(prefix + path)
160		if err != nil {
161			return err
162		}
163		lr := &io.LimitedReader{R: rc, N: size + 1}
164		if _, err := io.Copy(w, lr); err != nil {
165			return err
166		}
167		if lr.N <= 0 {
168			return fmt.Errorf("file %q is larger than declared size", path)
169		}
170		return nil
171	}
172
173	collisions := make(collisionChecker)
174	maxSize := int64(MaxZipFile)
175	for _, f := range files {
176		p := f.Path()
177		if p != path.Clean(p) {
178			return fmt.Errorf("file path %s is not clean", p)
179		}
180		if path.IsAbs(p) {
181			return fmt.Errorf("file path %s is not relative", p)
182		}
183		if isVendoredPackage(p) || inSubmodule(p) {
184			continue
185		}
186		if p == ".hg_archival.txt" {
187			// Inserted by hg archive.
188			// The go command drops this regardless of the VCS being used.
189			continue
190		}
191		if err := module.CheckFilePath(p); err != nil {
192			return err
193		}
194		if strings.ToLower(p) == "go.mod" && p != "go.mod" {
195			return fmt.Errorf("found file named %s, want all lower-case go.mod", p)
196		}
197		info, err := f.Lstat()
198		if err != nil {
199			return err
200		}
201		if err := collisions.check(p, info.IsDir()); err != nil {
202			return err
203		}
204		if !info.Mode().IsRegular() {
205			// Skip symbolic links (golang.org/issue/27093).
206			continue
207		}
208		size := info.Size()
209		if size < 0 || maxSize < size {
210			return fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
211		}
212		maxSize -= size
213		if p == "go.mod" && size > MaxGoMod {
214			return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
215		}
216		if p == "LICENSE" && size > MaxLICENSE {
217			return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
218		}
219
220		if err := addFile(f, p, size); err != nil {
221			return err
222		}
223	}
224
225	return zw.Close()
226}
227
228// CreateFromDir creates a module zip file for module m from the contents of
229// a directory, dir. The zip content is written to w.
230//
231// CreateFromDir verifies the restrictions described in the package
232// documentation and should not produce an archive that Unzip cannot extract.
233// CreateFromDir does not include files in the output archive if they don't
234// belong in the module zip. In particular, CreateFromDir will not include
235// files in modules found in subdirectories, most files in vendor directories,
236// or irregular files (such as symbolic links) in the output archive.
237// Additionally, unlike Create, CreateFromDir will not include directories
238// named ".bzr", ".git", ".hg", or ".svn".
239func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
240	defer func() {
241		if zerr, ok := err.(*zipError); ok {
242			zerr.path = dir
243		} else if err != nil {
244			err = &zipError{verb: "create zip", path: dir, err: err}
245		}
246	}()
247
248	var files []File
249	err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
250		if err != nil {
251			return err
252		}
253		relPath, err := filepath.Rel(dir, filePath)
254		if err != nil {
255			return err
256		}
257		slashPath := filepath.ToSlash(relPath)
258
259		if info.IsDir() {
260			if filePath == dir {
261				// Don't skip the top-level directory.
262				return nil
263			}
264
265			// Skip VCS directories.
266			// fossil repos are regular files with arbitrary names, so we don't try
267			// to exclude them.
268			switch filepath.Base(filePath) {
269			case ".bzr", ".git", ".hg", ".svn":
270				return filepath.SkipDir
271			}
272
273			// Skip some subdirectories inside vendor, but maintain bug
274			// golang.org/issue/31562, described in isVendoredPackage.
275			// We would like Create and CreateFromDir to produce the same result
276			// for a set of files, whether expressed as a directory tree or zip.
277			if isVendoredPackage(slashPath) {
278				return filepath.SkipDir
279			}
280
281			// Skip submodules (directories containing go.mod files).
282			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
283				return filepath.SkipDir
284			}
285			return nil
286		}
287
288		if info.Mode().IsRegular() {
289			if !isVendoredPackage(slashPath) {
290				files = append(files, dirFile{
291					filePath:  filePath,
292					slashPath: slashPath,
293					info:      info,
294				})
295			}
296			return nil
297		}
298
299		// Not a regular file or a directory. Probably a symbolic link.
300		// Irregular files are ignored, so skip it.
301		return nil
302	})
303	if err != nil {
304		return err
305	}
306
307	return Create(w, m, files)
308}
309
310type dirFile struct {
311	filePath, slashPath string
312	info                os.FileInfo
313}
314
315func (f dirFile) Path() string                 { return f.slashPath }
316func (f dirFile) Lstat() (os.FileInfo, error)  { return f.info, nil }
317func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
318
319// isVendoredPackage attempts to report whether the given filename is contained
320// in a package whose import path contains (but does not end with) the component
321// "vendor".
322//
323// Unfortunately, isVendoredPackage reports false positives for files in any
324// non-top-level package whose import path ends in "vendor".
325func isVendoredPackage(name string) bool {
326	var i int
327	if strings.HasPrefix(name, "vendor/") {
328		i += len("vendor/")
329	} else if j := strings.Index(name, "/vendor/"); j >= 0 {
330		// This offset looks incorrect; this should probably be
331		//
332		// 	i = j + len("/vendor/")
333		//
334		// (See https://golang.org/issue/31562 and https://golang.org/issue/37397.)
335		// Unfortunately, we can't fix it without invalidating module checksums.
336		i += len("/vendor/")
337	} else {
338		return false
339	}
340	return strings.Contains(name[i:], "/")
341}
342
343// Unzip extracts the contents of a module zip file to a directory.
344//
345// Unzip checks all restrictions listed in the package documentation and returns
346// an error if the zip archive is not valid. In some cases, files may be written
347// to dir before an error is returned (for example, if a file's uncompressed
348// size does not match its declared size).
349//
350// dir may or may not exist: Unzip will create it and any missing parent
351// directories if it doesn't exist. If dir exists, it must be empty.
352func Unzip(dir string, m module.Version, zipFile string) (err error) {
353	defer func() {
354		if err != nil {
355			err = &zipError{verb: "unzip", path: zipFile, err: err}
356		}
357	}()
358
359	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
360		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
361	}
362	if err := module.Check(m.Path, m.Version); err != nil {
363		return err
364	}
365
366	// Check that the directory is empty. Don't create it yet in case there's
367	// an error reading the zip.
368	files, _ := ioutil.ReadDir(dir)
369	if len(files) > 0 {
370		return fmt.Errorf("target directory %v exists and is not empty", dir)
371	}
372
373	// Open the zip file and ensure it's under the size limit.
374	f, err := os.Open(zipFile)
375	if err != nil {
376		return err
377	}
378	defer f.Close()
379	info, err := f.Stat()
380	if err != nil {
381		return err
382	}
383	zipSize := info.Size()
384	if zipSize > MaxZipFile {
385		return fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)
386	}
387
388	z, err := zip.NewReader(f, zipSize)
389	if err != nil {
390		return err
391	}
392
393	// Check total size, valid file names.
394	collisions := make(collisionChecker)
395	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
396	var size int64
397	for _, zf := range z.File {
398		if !strings.HasPrefix(zf.Name, prefix) {
399			return fmt.Errorf("unexpected file name %s", zf.Name)
400		}
401		name := zf.Name[len(prefix):]
402		if name == "" {
403			continue
404		}
405		isDir := strings.HasSuffix(name, "/")
406		if isDir {
407			name = name[:len(name)-1]
408		}
409		if path.Clean(name) != name {
410			return fmt.Errorf("invalid file name %s", zf.Name)
411		}
412		if err := module.CheckFilePath(name); err != nil {
413			return err
414		}
415		if err := collisions.check(name, isDir); err != nil {
416			return err
417		}
418		if isDir {
419			continue
420		}
421		if base := path.Base(name); strings.EqualFold(base, "go.mod") {
422			if base != name {
423				return fmt.Errorf("found go.mod file not in module root directory (%s)", zf.Name)
424			} else if name != "go.mod" {
425				return fmt.Errorf("found file named %s, want all lower-case go.mod", zf.Name)
426			}
427		}
428		s := int64(zf.UncompressedSize64)
429		if s < 0 || MaxZipFile-size < s {
430			return fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
431		}
432		size += s
433		if name == "go.mod" && s > MaxGoMod {
434			return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
435		}
436		if name == "LICENSE" && s > MaxLICENSE {
437			return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
438		}
439	}
440
441	// Unzip, enforcing sizes checked earlier.
442	if err := os.MkdirAll(dir, 0777); err != nil {
443		return err
444	}
445	for _, zf := range z.File {
446		name := zf.Name[len(prefix):]
447		if name == "" || strings.HasSuffix(name, "/") {
448			continue
449		}
450		dst := filepath.Join(dir, name)
451		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
452			return err
453		}
454		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
455		if err != nil {
456			return err
457		}
458		r, err := zf.Open()
459		if err != nil {
460			w.Close()
461			return err
462		}
463		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
464		_, err = io.Copy(w, lr)
465		r.Close()
466		if err != nil {
467			w.Close()
468			return err
469		}
470		if err := w.Close(); err != nil {
471			return err
472		}
473		if lr.N <= 0 {
474			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
475		}
476	}
477
478	return nil
479}
480
481// collisionChecker finds case-insensitive name collisions and paths that
482// are listed as both files and directories.
483//
484// The keys of this map are processed with strToFold. pathInfo has the original
485// path for each folded path.
486type collisionChecker map[string]pathInfo
487
488type pathInfo struct {
489	path  string
490	isDir bool
491}
492
493func (cc collisionChecker) check(p string, isDir bool) error {
494	fold := strToFold(p)
495	if other, ok := cc[fold]; ok {
496		if p != other.path {
497			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
498		}
499		if isDir != other.isDir {
500			return fmt.Errorf("entry %q is both a file and a directory", p)
501		}
502		if !isDir {
503			return fmt.Errorf("multiple entries for file %q", p)
504		}
505		// It's not an error if check is called with the same directory multiple
506		// times. check is called recursively on parent directories, so check
507		// may be called on the same directory many times.
508	} else {
509		cc[fold] = pathInfo{path: p, isDir: isDir}
510	}
511
512	if parent := path.Dir(p); parent != "." {
513		return cc.check(parent, true)
514	}
515	return nil
516}
517
518type zipError struct {
519	verb, path string
520	err        error
521}
522
523func (e *zipError) Error() string {
524	if e.path == "" {
525		return fmt.Sprintf("%s: %v", e.verb, e.err)
526	} else {
527		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
528	}
529}
530
531func (e *zipError) Unwrap() error {
532	return e.err
533}
534
535// strToFold returns a string with the property that
536//	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
537// This lets us test a large set of strings for fold-equivalent
538// duplicates without making a quadratic number of calls
539// to EqualFold. Note that strings.ToUpper and strings.ToLower
540// do not have the desired property in some corner cases.
541func strToFold(s string) string {
542	// Fast path: all ASCII, no upper case.
543	// Most paths look like this already.
544	for i := 0; i < len(s); i++ {
545		c := s[i]
546		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
547			goto Slow
548		}
549	}
550	return s
551
552Slow:
553	var buf bytes.Buffer
554	for _, r := range s {
555		// SimpleFold(x) cycles to the next equivalent rune > x
556		// or wraps around to smaller values. Iterate until it wraps,
557		// and we've found the minimum value.
558		for {
559			r0 := r
560			r = unicode.SimpleFold(r0)
561			if r <= r0 {
562				break
563			}
564		}
565		// Exception to allow fast path above: A-Z => a-z
566		if 'A' <= r && r <= 'Z' {
567			r += 'a' - 'A'
568		}
569		buf.WriteRune(r)
570	}
571	return buf.String()
572}
573