1package archive // import "github.com/docker/docker/pkg/archive"
2
3import (
4	"archive/tar"
5	"errors"
6	"io"
7	"io/ioutil"
8	"os"
9	"path/filepath"
10	"strings"
11
12	"github.com/docker/docker/pkg/system"
13	"github.com/sirupsen/logrus"
14)
15
16// Errors used or returned by this file.
17var (
18	ErrNotDirectory      = errors.New("not a directory")
19	ErrDirNotExists      = errors.New("no such directory")
20	ErrCannotCopyDir     = errors.New("cannot copy directory")
21	ErrInvalidCopySource = errors.New("invalid copy source content")
22)
23
24// PreserveTrailingDotOrSeparator returns the given cleaned path (after
25// processing using any utility functions from the path or filepath stdlib
26// packages) and appends a trailing `/.` or `/` if its corresponding  original
27// path (from before being processed by utility functions from the path or
28// filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
29// path already ends in a `.` path segment, then another is not added. If the
30// clean path already ends in the separator, then another is not added.
31func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string {
32	// Ensure paths are in platform semantics
33	cleanedPath = strings.Replace(cleanedPath, "/", string(sep), -1)
34	originalPath = strings.Replace(originalPath, "/", string(sep), -1)
35
36	if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
37		if !hasTrailingPathSeparator(cleanedPath, sep) {
38			// Add a separator if it doesn't already end with one (a cleaned
39			// path would only end in a separator if it is the root).
40			cleanedPath += string(sep)
41		}
42		cleanedPath += "."
43	}
44
45	if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) {
46		cleanedPath += string(sep)
47	}
48
49	return cleanedPath
50}
51
52// assertsDirectory returns whether the given path is
53// asserted to be a directory, i.e., the path ends with
54// a trailing '/' or `/.`, assuming a path separator of `/`.
55func assertsDirectory(path string, sep byte) bool {
56	return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path)
57}
58
59// hasTrailingPathSeparator returns whether the given
60// path ends with the system's path separator character.
61func hasTrailingPathSeparator(path string, sep byte) bool {
62	return len(path) > 0 && path[len(path)-1] == sep
63}
64
65// specifiesCurrentDir returns whether the given path specifies
66// a "current directory", i.e., the last path segment is `.`.
67func specifiesCurrentDir(path string) bool {
68	return filepath.Base(path) == "."
69}
70
71// SplitPathDirEntry splits the given path between its directory name and its
72// basename by first cleaning the path but preserves a trailing "." if the
73// original path specified the current directory.
74func SplitPathDirEntry(path string) (dir, base string) {
75	cleanedPath := filepath.Clean(filepath.FromSlash(path))
76
77	if specifiesCurrentDir(path) {
78		cleanedPath += string(os.PathSeparator) + "."
79	}
80
81	return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
82}
83
84// TarResource archives the resource described by the given CopyInfo to a Tar
85// archive. A non-nil error is returned if sourcePath does not exist or is
86// asserted to be a directory but exists as another type of file.
87//
88// This function acts as a convenient wrapper around TarWithOptions, which
89// requires a directory as the source path. TarResource accepts either a
90// directory or a file path and correctly sets the Tar options.
91func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
92	return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
93}
94
95// TarResourceRebase is like TarResource but renames the first path element of
96// items in the resulting tar archive to match the given rebaseName if not "".
97func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
98	sourcePath = normalizePath(sourcePath)
99	if _, err = os.Lstat(sourcePath); err != nil {
100		// Catches the case where the source does not exist or is not a
101		// directory if asserted to be a directory, as this also causes an
102		// error.
103		return
104	}
105
106	// Separate the source path between its directory and
107	// the entry in that directory which we are archiving.
108	sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
109	opts := TarResourceRebaseOpts(sourceBase, rebaseName)
110
111	logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
112	return TarWithOptions(sourceDir, opts)
113}
114
115// TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
116// parameters to be sent to TarWithOptions (the TarOptions struct)
117func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
118	filter := []string{sourceBase}
119	return &TarOptions{
120		Compression:      Uncompressed,
121		IncludeFiles:     filter,
122		IncludeSourceDir: true,
123		RebaseNames: map[string]string{
124			sourceBase: rebaseName,
125		},
126	}
127}
128
129// CopyInfo holds basic info about the source
130// or destination path of a copy operation.
131type CopyInfo struct {
132	Path       string
133	Exists     bool
134	IsDir      bool
135	RebaseName string
136}
137
138// CopyInfoSourcePath stats the given path to create a CopyInfo
139// struct representing that resource for the source of an archive copy
140// operation. The given path should be an absolute local path. A source path
141// has all symlinks evaluated that appear before the last path separator ("/"
142// on Unix). As it is to be a copy source, the path must exist.
143func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
144	// normalize the file path and then evaluate the symbol link
145	// we will use the target file instead of the symbol link if
146	// followLink is set
147	path = normalizePath(path)
148
149	resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
150	if err != nil {
151		return CopyInfo{}, err
152	}
153
154	stat, err := os.Lstat(resolvedPath)
155	if err != nil {
156		return CopyInfo{}, err
157	}
158
159	return CopyInfo{
160		Path:       resolvedPath,
161		Exists:     true,
162		IsDir:      stat.IsDir(),
163		RebaseName: rebaseName,
164	}, nil
165}
166
167// CopyInfoDestinationPath stats the given path to create a CopyInfo
168// struct representing that resource for the destination of an archive copy
169// operation. The given path should be an absolute local path.
170func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
171	maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
172	path = normalizePath(path)
173	originalPath := path
174
175	stat, err := os.Lstat(path)
176
177	if err == nil && stat.Mode()&os.ModeSymlink == 0 {
178		// The path exists and is not a symlink.
179		return CopyInfo{
180			Path:   path,
181			Exists: true,
182			IsDir:  stat.IsDir(),
183		}, nil
184	}
185
186	// While the path is a symlink.
187	for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
188		if n > maxSymlinkIter {
189			// Don't follow symlinks more than this arbitrary number of times.
190			return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
191		}
192
193		// The path is a symbolic link. We need to evaluate it so that the
194		// destination of the copy operation is the link target and not the
195		// link itself. This is notably different than CopyInfoSourcePath which
196		// only evaluates symlinks before the last appearing path separator.
197		// Also note that it is okay if the last path element is a broken
198		// symlink as the copy operation should create the target.
199		var linkTarget string
200
201		linkTarget, err = os.Readlink(path)
202		if err != nil {
203			return CopyInfo{}, err
204		}
205
206		if !system.IsAbs(linkTarget) {
207			// Join with the parent directory.
208			dstParent, _ := SplitPathDirEntry(path)
209			linkTarget = filepath.Join(dstParent, linkTarget)
210		}
211
212		path = linkTarget
213		stat, err = os.Lstat(path)
214	}
215
216	if err != nil {
217		// It's okay if the destination path doesn't exist. We can still
218		// continue the copy operation if the parent directory exists.
219		if !os.IsNotExist(err) {
220			return CopyInfo{}, err
221		}
222
223		// Ensure destination parent dir exists.
224		dstParent, _ := SplitPathDirEntry(path)
225
226		parentDirStat, err := os.Stat(dstParent)
227		if err != nil {
228			return CopyInfo{}, err
229		}
230		if !parentDirStat.IsDir() {
231			return CopyInfo{}, ErrNotDirectory
232		}
233
234		return CopyInfo{Path: path}, nil
235	}
236
237	// The path exists after resolving symlinks.
238	return CopyInfo{
239		Path:   path,
240		Exists: true,
241		IsDir:  stat.IsDir(),
242	}, nil
243}
244
245// PrepareArchiveCopy prepares the given srcContent archive, which should
246// contain the archived resource described by srcInfo, to the destination
247// described by dstInfo. Returns the possibly modified content archive along
248// with the path to the destination directory which it should be extracted to.
249func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
250	// Ensure in platform semantics
251	srcInfo.Path = normalizePath(srcInfo.Path)
252	dstInfo.Path = normalizePath(dstInfo.Path)
253
254	// Separate the destination path between its directory and base
255	// components in case the source archive contents need to be rebased.
256	dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
257	_, srcBase := SplitPathDirEntry(srcInfo.Path)
258
259	switch {
260	case dstInfo.Exists && dstInfo.IsDir:
261		// The destination exists as a directory. No alteration
262		// to srcContent is needed as its contents can be
263		// simply extracted to the destination directory.
264		return dstInfo.Path, ioutil.NopCloser(srcContent), nil
265	case dstInfo.Exists && srcInfo.IsDir:
266		// The destination exists as some type of file and the source
267		// content is a directory. This is an error condition since
268		// you cannot copy a directory to an existing file location.
269		return "", nil, ErrCannotCopyDir
270	case dstInfo.Exists:
271		// The destination exists as some type of file and the source content
272		// is also a file. The source content entry will have to be renamed to
273		// have a basename which matches the destination path's basename.
274		if len(srcInfo.RebaseName) != 0 {
275			srcBase = srcInfo.RebaseName
276		}
277		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
278	case srcInfo.IsDir:
279		// The destination does not exist and the source content is an archive
280		// of a directory. The archive should be extracted to the parent of
281		// the destination path instead, and when it is, the directory that is
282		// created as a result should take the name of the destination path.
283		// The source content entries will have to be renamed to have a
284		// basename which matches the destination path's basename.
285		if len(srcInfo.RebaseName) != 0 {
286			srcBase = srcInfo.RebaseName
287		}
288		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
289	case assertsDirectory(dstInfo.Path, os.PathSeparator):
290		// The destination does not exist and is asserted to be created as a
291		// directory, but the source content is not a directory. This is an
292		// error condition since you cannot create a directory from a file
293		// source.
294		return "", nil, ErrDirNotExists
295	default:
296		// The last remaining case is when the destination does not exist, is
297		// not asserted to be a directory, and the source content is not an
298		// archive of a directory. It this case, the destination file will need
299		// to be created when the archive is extracted and the source content
300		// entry will have to be renamed to have a basename which matches the
301		// destination path's basename.
302		if len(srcInfo.RebaseName) != 0 {
303			srcBase = srcInfo.RebaseName
304		}
305		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
306	}
307
308}
309
310// RebaseArchiveEntries rewrites the given srcContent archive replacing
311// an occurrence of oldBase with newBase at the beginning of entry names.
312func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
313	if oldBase == string(os.PathSeparator) {
314		// If oldBase specifies the root directory, use an empty string as
315		// oldBase instead so that newBase doesn't replace the path separator
316		// that all paths will start with.
317		oldBase = ""
318	}
319
320	rebased, w := io.Pipe()
321
322	go func() {
323		srcTar := tar.NewReader(srcContent)
324		rebasedTar := tar.NewWriter(w)
325
326		for {
327			hdr, err := srcTar.Next()
328			if err == io.EOF {
329				// Signals end of archive.
330				rebasedTar.Close()
331				w.Close()
332				return
333			}
334			if err != nil {
335				w.CloseWithError(err)
336				return
337			}
338
339			hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
340			if hdr.Typeflag == tar.TypeLink {
341				hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
342			}
343
344			if err = rebasedTar.WriteHeader(hdr); err != nil {
345				w.CloseWithError(err)
346				return
347			}
348
349			if _, err = io.Copy(rebasedTar, srcTar); err != nil {
350				w.CloseWithError(err)
351				return
352			}
353		}
354	}()
355
356	return rebased
357}
358
359// TODO @gupta-ak. These might have to be changed in the future to be
360// continuity driver aware as well to support LCOW.
361
362// CopyResource performs an archive copy from the given source path to the
363// given destination path. The source path MUST exist and the destination
364// path's parent directory must exist.
365func CopyResource(srcPath, dstPath string, followLink bool) error {
366	var (
367		srcInfo CopyInfo
368		err     error
369	)
370
371	// Ensure in platform semantics
372	srcPath = normalizePath(srcPath)
373	dstPath = normalizePath(dstPath)
374
375	// Clean the source and destination paths.
376	srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator)
377	dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator)
378
379	if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
380		return err
381	}
382
383	content, err := TarResource(srcInfo)
384	if err != nil {
385		return err
386	}
387	defer content.Close()
388
389	return CopyTo(content, srcInfo, dstPath)
390}
391
392// CopyTo handles extracting the given content whose
393// entries should be sourced from srcInfo to dstPath.
394func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
395	// The destination path need not exist, but CopyInfoDestinationPath will
396	// ensure that at least the parent directory exists.
397	dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
398	if err != nil {
399		return err
400	}
401
402	dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
403	if err != nil {
404		return err
405	}
406	defer copyArchive.Close()
407
408	options := &TarOptions{
409		NoLchown:             true,
410		NoOverwriteDirNonDir: true,
411	}
412
413	return Untar(copyArchive, dstDir, options)
414}
415
416// ResolveHostSourcePath decides real path need to be copied with parameters such as
417// whether to follow symbol link or not, if followLink is true, resolvedPath will return
418// link target of any symbol link file, else it will only resolve symlink of directory
419// but return symbol link file itself without resolving.
420func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
421	if followLink {
422		resolvedPath, err = filepath.EvalSymlinks(path)
423		if err != nil {
424			return
425		}
426
427		resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
428	} else {
429		dirPath, basePath := filepath.Split(path)
430
431		// if not follow symbol link, then resolve symbol link of parent dir
432		var resolvedDirPath string
433		resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
434		if err != nil {
435			return
436		}
437		// resolvedDirPath will have been cleaned (no trailing path separators) so
438		// we can manually join it with the base path element.
439		resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
440		if hasTrailingPathSeparator(path, os.PathSeparator) &&
441			filepath.Base(path) != filepath.Base(resolvedPath) {
442			rebaseName = filepath.Base(path)
443		}
444	}
445	return resolvedPath, rebaseName, nil
446}
447
448// GetRebaseName normalizes and compares path and resolvedPath,
449// return completed resolved path and rebased file name
450func GetRebaseName(path, resolvedPath string) (string, string) {
451	// linkTarget will have been cleaned (no trailing path separators and dot) so
452	// we can manually join it with them
453	var rebaseName string
454	if specifiesCurrentDir(path) &&
455		!specifiesCurrentDir(resolvedPath) {
456		resolvedPath += string(filepath.Separator) + "."
457	}
458
459	if hasTrailingPathSeparator(path, os.PathSeparator) &&
460		!hasTrailingPathSeparator(resolvedPath, os.PathSeparator) {
461		resolvedPath += string(filepath.Separator)
462	}
463
464	if filepath.Base(path) != filepath.Base(resolvedPath) {
465		// In the case where the path had a trailing separator and a symlink
466		// evaluation has changed the last path component, we will need to
467		// rebase the name in the archive that is being copied to match the
468		// originally requested name.
469		rebaseName = filepath.Base(path)
470	}
471	return resolvedPath, rebaseName
472}
473