1package archive // import "github.com/docker/docker/pkg/archive"
2
3import (
4	"archive/tar"
5	"errors"
6	"io"
7	"io/ioutil"
8	"os"
9	"path/filepath"
10	"strings"
11
12	"github.com/docker/docker/pkg/system"
13	"github.com/sirupsen/logrus"
14)
15
16// Errors used or returned by this file.
17var (
18	ErrNotDirectory      = errors.New("not a directory")
19	ErrDirNotExists      = errors.New("no such directory")
20	ErrCannotCopyDir     = errors.New("cannot copy directory")
21	ErrInvalidCopySource = errors.New("invalid copy source content")
22)
23
24// PreserveTrailingDotOrSeparator returns the given cleaned path (after
25// processing using any utility functions from the path or filepath stdlib
26// packages) and appends a trailing `/.` or `/` if its corresponding  original
27// path (from before being processed by utility functions from the path or
28// filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
29// path already ends in a `.` path segment, then another is not added. If the
30// clean path already ends in the separator, then another is not added.
31func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string {
32	// Ensure paths are in platform semantics
33	cleanedPath = strings.Replace(cleanedPath, "/", string(sep), -1)
34	originalPath = strings.Replace(originalPath, "/", string(sep), -1)
35
36	if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
37		if !hasTrailingPathSeparator(cleanedPath, sep) {
38			// Add a separator if it doesn't already end with one (a cleaned
39			// path would only end in a separator if it is the root).
40			cleanedPath += string(sep)
41		}
42		cleanedPath += "."
43	}
44
45	if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) {
46		cleanedPath += string(sep)
47	}
48
49	return cleanedPath
50}
51
52// assertsDirectory returns whether the given path is
53// asserted to be a directory, i.e., the path ends with
54// a trailing '/' or `/.`, assuming a path separator of `/`.
55func assertsDirectory(path string, sep byte) bool {
56	return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path)
57}
58
59// hasTrailingPathSeparator returns whether the given
60// path ends with the system's path separator character.
61func hasTrailingPathSeparator(path string, sep byte) bool {
62	return len(path) > 0 && path[len(path)-1] == sep
63}
64
65// specifiesCurrentDir returns whether the given path specifies
66// a "current directory", i.e., the last path segment is `.`.
67func specifiesCurrentDir(path string) bool {
68	return filepath.Base(path) == "."
69}
70
71// SplitPathDirEntry splits the given path between its directory name and its
72// basename by first cleaning the path but preserves a trailing "." if the
73// original path specified the current directory.
74func SplitPathDirEntry(path string) (dir, base string) {
75	cleanedPath := filepath.Clean(filepath.FromSlash(path))
76
77	if specifiesCurrentDir(path) {
78		cleanedPath += string(os.PathSeparator) + "."
79	}
80
81	return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
82}
83
84// TarResource archives the resource described by the given CopyInfo to a Tar
85// archive. A non-nil error is returned if sourcePath does not exist or is
86// asserted to be a directory but exists as another type of file.
87//
88// This function acts as a convenient wrapper around TarWithOptions, which
89// requires a directory as the source path. TarResource accepts either a
90// directory or a file path and correctly sets the Tar options.
91func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
92	return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
93}
94
95// TarResourceRebase is like TarResource but renames the first path element of
96// items in the resulting tar archive to match the given rebaseName if not "".
97func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
98	sourcePath = normalizePath(sourcePath)
99	if _, err = os.Lstat(sourcePath); err != nil {
100		// Catches the case where the source does not exist or is not a
101		// directory if asserted to be a directory, as this also causes an
102		// error.
103		return
104	}
105
106	// Separate the source path between its directory and
107	// the entry in that directory which we are archiving.
108	sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
109	opts := TarResourceRebaseOpts(sourceBase, rebaseName)
110
111	logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
112	return TarWithOptions(sourceDir, opts)
113}
114
115// TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
116// parameters to be sent to TarWithOptions (the TarOptions struct)
117func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
118	filter := []string{sourceBase}
119	return &TarOptions{
120		Compression:      Uncompressed,
121		IncludeFiles:     filter,
122		IncludeSourceDir: true,
123		RebaseNames: map[string]string{
124			sourceBase: rebaseName,
125		},
126	}
127}
128
129// CopyInfo holds basic info about the source
130// or destination path of a copy operation.
131type CopyInfo struct {
132	Path       string
133	Exists     bool
134	IsDir      bool
135	RebaseName string
136}
137
138// CopyInfoSourcePath stats the given path to create a CopyInfo
139// struct representing that resource for the source of an archive copy
140// operation. The given path should be an absolute local path. A source path
141// has all symlinks evaluated that appear before the last path separator ("/"
142// on Unix). As it is to be a copy source, the path must exist.
143func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
144	// normalize the file path and then evaluate the symbol link
145	// we will use the target file instead of the symbol link if
146	// followLink is set
147	path = normalizePath(path)
148
149	resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
150	if err != nil {
151		return CopyInfo{}, err
152	}
153
154	stat, err := os.Lstat(resolvedPath)
155	if err != nil {
156		return CopyInfo{}, err
157	}
158
159	return CopyInfo{
160		Path:       resolvedPath,
161		Exists:     true,
162		IsDir:      stat.IsDir(),
163		RebaseName: rebaseName,
164	}, nil
165}
166
167// CopyInfoDestinationPath stats the given path to create a CopyInfo
168// struct representing that resource for the destination of an archive copy
169// operation. The given path should be an absolute local path.
170func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
171	maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
172	path = normalizePath(path)
173	originalPath := path
174
175	stat, err := os.Lstat(path)
176
177	if err == nil && stat.Mode()&os.ModeSymlink == 0 {
178		// The path exists and is not a symlink.
179		return CopyInfo{
180			Path:   path,
181			Exists: true,
182			IsDir:  stat.IsDir(),
183		}, nil
184	}
185
186	// While the path is a symlink.
187	for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
188		if n > maxSymlinkIter {
189			// Don't follow symlinks more than this arbitrary number of times.
190			return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
191		}
192
193		// The path is a symbolic link. We need to evaluate it so that the
194		// destination of the copy operation is the link target and not the
195		// link itself. This is notably different than CopyInfoSourcePath which
196		// only evaluates symlinks before the last appearing path separator.
197		// Also note that it is okay if the last path element is a broken
198		// symlink as the copy operation should create the target.
199		var linkTarget string
200
201		linkTarget, err = os.Readlink(path)
202		if err != nil {
203			return CopyInfo{}, err
204		}
205
206		if !system.IsAbs(linkTarget) {
207			// Join with the parent directory.
208			dstParent, _ := SplitPathDirEntry(path)
209			linkTarget = filepath.Join(dstParent, linkTarget)
210		}
211
212		path = linkTarget
213		stat, err = os.Lstat(path)
214	}
215
216	if err != nil {
217		// It's okay if the destination path doesn't exist. We can still
218		// continue the copy operation if the parent directory exists.
219		if !os.IsNotExist(err) {
220			return CopyInfo{}, err
221		}
222
223		// Ensure destination parent dir exists.
224		dstParent, _ := SplitPathDirEntry(path)
225
226		parentDirStat, err := os.Stat(dstParent)
227		if err != nil {
228			return CopyInfo{}, err
229		}
230		if !parentDirStat.IsDir() {
231			return CopyInfo{}, ErrNotDirectory
232		}
233
234		return CopyInfo{Path: path}, nil
235	}
236
237	// The path exists after resolving symlinks.
238	return CopyInfo{
239		Path:   path,
240		Exists: true,
241		IsDir:  stat.IsDir(),
242	}, nil
243}
244
245// PrepareArchiveCopy prepares the given srcContent archive, which should
246// contain the archived resource described by srcInfo, to the destination
247// described by dstInfo. Returns the possibly modified content archive along
248// with the path to the destination directory which it should be extracted to.
249func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
250	// Ensure in platform semantics
251	srcInfo.Path = normalizePath(srcInfo.Path)
252	dstInfo.Path = normalizePath(dstInfo.Path)
253
254	// Separate the destination path between its directory and base
255	// components in case the source archive contents need to be rebased.
256	dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
257	_, srcBase := SplitPathDirEntry(srcInfo.Path)
258
259	switch {
260	case dstInfo.Exists && dstInfo.IsDir:
261		// The destination exists as a directory. No alteration
262		// to srcContent is needed as its contents can be
263		// simply extracted to the destination directory.
264		return dstInfo.Path, ioutil.NopCloser(srcContent), nil
265	case dstInfo.Exists && srcInfo.IsDir:
266		// The destination exists as some type of file and the source
267		// content is a directory. This is an error condition since
268		// you cannot copy a directory to an existing file location.
269		return "", nil, ErrCannotCopyDir
270	case dstInfo.Exists:
271		// The destination exists as some type of file and the source content
272		// is also a file. The source content entry will have to be renamed to
273		// have a basename which matches the destination path's basename.
274		if len(srcInfo.RebaseName) != 0 {
275			srcBase = srcInfo.RebaseName
276		}
277		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
278	case srcInfo.IsDir:
279		// The destination does not exist and the source content is an archive
280		// of a directory. The archive should be extracted to the parent of
281		// the destination path instead, and when it is, the directory that is
282		// created as a result should take the name of the destination path.
283		// The source content entries will have to be renamed to have a
284		// basename which matches the destination path's basename.
285		if len(srcInfo.RebaseName) != 0 {
286			srcBase = srcInfo.RebaseName
287		}
288		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
289	case assertsDirectory(dstInfo.Path, os.PathSeparator):
290		// The destination does not exist and is asserted to be created as a
291		// directory, but the source content is not a directory. This is an
292		// error condition since you cannot create a directory from a file
293		// source.
294		return "", nil, ErrDirNotExists
295	default:
296		// The last remaining case is when the destination does not exist, is
297		// not asserted to be a directory, and the source content is not an
298		// archive of a directory. It this case, the destination file will need
299		// to be created when the archive is extracted and the source content
300		// entry will have to be renamed to have a basename which matches the
301		// destination path's basename.
302		if len(srcInfo.RebaseName) != 0 {
303			srcBase = srcInfo.RebaseName
304		}
305		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
306	}
307
308}
309
310// RebaseArchiveEntries rewrites the given srcContent archive replacing
311// an occurrence of oldBase with newBase at the beginning of entry names.
312func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
313	if oldBase == string(os.PathSeparator) {
314		// If oldBase specifies the root directory, use an empty string as
315		// oldBase instead so that newBase doesn't replace the path separator
316		// that all paths will start with.
317		oldBase = ""
318	}
319
320	rebased, w := io.Pipe()
321
322	go func() {
323		srcTar := tar.NewReader(srcContent)
324		rebasedTar := tar.NewWriter(w)
325
326		for {
327			hdr, err := srcTar.Next()
328			if err == io.EOF {
329				// Signals end of archive.
330				rebasedTar.Close()
331				w.Close()
332				return
333			}
334			if err != nil {
335				w.CloseWithError(err)
336				return
337			}
338
339			// srcContent tar stream, as served by TarWithOptions(), is
340			// definitely in PAX format, but tar.Next() mistakenly guesses it
341			// as USTAR, which creates a problem: if the newBase is >100
342			// characters long, WriteHeader() returns an error like
343			// "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...".
344			//
345			// To fix, set the format to PAX here. See docker/for-linux issue #484.
346			hdr.Format = tar.FormatPAX
347			hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
348			if hdr.Typeflag == tar.TypeLink {
349				hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
350			}
351
352			if err = rebasedTar.WriteHeader(hdr); err != nil {
353				w.CloseWithError(err)
354				return
355			}
356
357			if _, err = io.Copy(rebasedTar, srcTar); err != nil {
358				w.CloseWithError(err)
359				return
360			}
361		}
362	}()
363
364	return rebased
365}
366
367// TODO @gupta-ak. These might have to be changed in the future to be
368// continuity driver aware as well to support LCOW.
369
370// CopyResource performs an archive copy from the given source path to the
371// given destination path. The source path MUST exist and the destination
372// path's parent directory must exist.
373func CopyResource(srcPath, dstPath string, followLink bool) error {
374	var (
375		srcInfo CopyInfo
376		err     error
377	)
378
379	// Ensure in platform semantics
380	srcPath = normalizePath(srcPath)
381	dstPath = normalizePath(dstPath)
382
383	// Clean the source and destination paths.
384	srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator)
385	dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator)
386
387	if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
388		return err
389	}
390
391	content, err := TarResource(srcInfo)
392	if err != nil {
393		return err
394	}
395	defer content.Close()
396
397	return CopyTo(content, srcInfo, dstPath)
398}
399
400// CopyTo handles extracting the given content whose
401// entries should be sourced from srcInfo to dstPath.
402func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
403	// The destination path need not exist, but CopyInfoDestinationPath will
404	// ensure that at least the parent directory exists.
405	dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
406	if err != nil {
407		return err
408	}
409
410	dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
411	if err != nil {
412		return err
413	}
414	defer copyArchive.Close()
415
416	options := &TarOptions{
417		NoLchown:             true,
418		NoOverwriteDirNonDir: true,
419	}
420
421	return Untar(copyArchive, dstDir, options)
422}
423
424// ResolveHostSourcePath decides real path need to be copied with parameters such as
425// whether to follow symbol link or not, if followLink is true, resolvedPath will return
426// link target of any symbol link file, else it will only resolve symlink of directory
427// but return symbol link file itself without resolving.
428func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
429	if followLink {
430		resolvedPath, err = filepath.EvalSymlinks(path)
431		if err != nil {
432			return
433		}
434
435		resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
436	} else {
437		dirPath, basePath := filepath.Split(path)
438
439		// if not follow symbol link, then resolve symbol link of parent dir
440		var resolvedDirPath string
441		resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
442		if err != nil {
443			return
444		}
445		// resolvedDirPath will have been cleaned (no trailing path separators) so
446		// we can manually join it with the base path element.
447		resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
448		if hasTrailingPathSeparator(path, os.PathSeparator) &&
449			filepath.Base(path) != filepath.Base(resolvedPath) {
450			rebaseName = filepath.Base(path)
451		}
452	}
453	return resolvedPath, rebaseName, nil
454}
455
456// GetRebaseName normalizes and compares path and resolvedPath,
457// return completed resolved path and rebased file name
458func GetRebaseName(path, resolvedPath string) (string, string) {
459	// linkTarget will have been cleaned (no trailing path separators and dot) so
460	// we can manually join it with them
461	var rebaseName string
462	if specifiesCurrentDir(path) &&
463		!specifiesCurrentDir(resolvedPath) {
464		resolvedPath += string(filepath.Separator) + "."
465	}
466
467	if hasTrailingPathSeparator(path, os.PathSeparator) &&
468		!hasTrailingPathSeparator(resolvedPath, os.PathSeparator) {
469		resolvedPath += string(filepath.Separator)
470	}
471
472	if filepath.Base(path) != filepath.Base(resolvedPath) {
473		// In the case where the path had a trailing separator and a symlink
474		// evaluation has changed the last path component, we will need to
475		// rebase the name in the archive that is being copied to match the
476		// originally requested name.
477		rebaseName = filepath.Base(path)
478	}
479	return resolvedPath, rebaseName
480}
481