1package archive // import "github.com/docker/docker/pkg/archive"
2
3import (
4	"archive/tar"
5	"bytes"
6	"fmt"
7	"io"
8	"io/ioutil"
9	"os"
10	"path/filepath"
11	"sort"
12	"strings"
13	"syscall"
14	"time"
15
16	"github.com/docker/docker/pkg/idtools"
17	"github.com/docker/docker/pkg/pools"
18	"github.com/docker/docker/pkg/system"
19	"github.com/sirupsen/logrus"
20)
21
22// ChangeType represents the change type.
23type ChangeType int
24
25const (
26	// ChangeModify represents the modify operation.
27	ChangeModify = iota
28	// ChangeAdd represents the add operation.
29	ChangeAdd
30	// ChangeDelete represents the delete operation.
31	ChangeDelete
32)
33
34func (c ChangeType) String() string {
35	switch c {
36	case ChangeModify:
37		return "C"
38	case ChangeAdd:
39		return "A"
40	case ChangeDelete:
41		return "D"
42	}
43	return ""
44}
45
46// Change represents a change, it wraps the change type and path.
47// It describes changes of the files in the path respect to the
48// parent layers. The change could be modify, add, delete.
49// This is used for layer diff.
50type Change struct {
51	Path string
52	Kind ChangeType
53}
54
55func (change *Change) String() string {
56	return fmt.Sprintf("%s %s", change.Kind, change.Path)
57}
58
59// for sort.Sort
60type changesByPath []Change
61
62func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
63func (c changesByPath) Len() int           { return len(c) }
64func (c changesByPath) Swap(i, j int)      { c[j], c[i] = c[i], c[j] }
65
66// Gnu tar doesn't have sub-second mtime precision. The go tar
67// writer (1.10+) does when using PAX format, but we round times to seconds
68// to ensure archives have the same hashes for backwards compatibility.
69// See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
70//
71// Non-sub-second is problematic when we apply changes via tar
72// files. We handle this by comparing for exact times, *or* same
73// second count and either a or b having exactly 0 nanoseconds
74func sameFsTime(a, b time.Time) bool {
75	return a.Equal(b) ||
76		(a.Unix() == b.Unix() &&
77			(a.Nanosecond() == 0 || b.Nanosecond() == 0))
78}
79
80func sameFsTimeSpec(a, b syscall.Timespec) bool {
81	return a.Sec == b.Sec &&
82		(a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
83}
84
85// Changes walks the path rw and determines changes for the files in the path,
86// with respect to the parent layers
87func Changes(layers []string, rw string) ([]Change, error) {
88	return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
89}
90
91func aufsMetadataSkip(path string) (skip bool, err error) {
92	skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
93	if err != nil {
94		skip = true
95	}
96	return
97}
98
99func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
100	f := filepath.Base(path)
101
102	// If there is a whiteout, then the file was removed
103	if strings.HasPrefix(f, WhiteoutPrefix) {
104		originalFile := f[len(WhiteoutPrefix):]
105		return filepath.Join(filepath.Dir(path), originalFile), nil
106	}
107
108	return "", nil
109}
110
111type skipChange func(string) (bool, error)
112type deleteChange func(string, string, os.FileInfo) (string, error)
113
114func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
115	var (
116		changes     []Change
117		changedDirs = make(map[string]struct{})
118	)
119
120	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
121		if err != nil {
122			return err
123		}
124
125		// Rebase path
126		path, err = filepath.Rel(rw, path)
127		if err != nil {
128			return err
129		}
130
131		// As this runs on the daemon side, file paths are OS specific.
132		path = filepath.Join(string(os.PathSeparator), path)
133
134		// Skip root
135		if path == string(os.PathSeparator) {
136			return nil
137		}
138
139		if sc != nil {
140			if skip, err := sc(path); skip {
141				return err
142			}
143		}
144
145		change := Change{
146			Path: path,
147		}
148
149		deletedFile, err := dc(rw, path, f)
150		if err != nil {
151			return err
152		}
153
154		// Find out what kind of modification happened
155		if deletedFile != "" {
156			change.Path = deletedFile
157			change.Kind = ChangeDelete
158		} else {
159			// Otherwise, the file was added
160			change.Kind = ChangeAdd
161
162			// ...Unless it already existed in a top layer, in which case, it's a modification
163			for _, layer := range layers {
164				stat, err := os.Stat(filepath.Join(layer, path))
165				if err != nil && !os.IsNotExist(err) {
166					return err
167				}
168				if err == nil {
169					// The file existed in the top layer, so that's a modification
170
171					// However, if it's a directory, maybe it wasn't actually modified.
172					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
173					if stat.IsDir() && f.IsDir() {
174						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
175							// Both directories are the same, don't record the change
176							return nil
177						}
178					}
179					change.Kind = ChangeModify
180					break
181				}
182			}
183		}
184
185		// If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
186		// This block is here to ensure the change is recorded even if the
187		// modify time, mode and size of the parent directory in the rw and ro layers are all equal.
188		// Check https://github.com/docker/docker/pull/13590 for details.
189		if f.IsDir() {
190			changedDirs[path] = struct{}{}
191		}
192		if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
193			parent := filepath.Dir(path)
194			if _, ok := changedDirs[parent]; !ok && parent != "/" {
195				changes = append(changes, Change{Path: parent, Kind: ChangeModify})
196				changedDirs[parent] = struct{}{}
197			}
198		}
199
200		// Record change
201		changes = append(changes, change)
202		return nil
203	})
204	if err != nil && !os.IsNotExist(err) {
205		return nil, err
206	}
207	return changes, nil
208}
209
210// FileInfo describes the information of a file.
211type FileInfo struct {
212	parent     *FileInfo
213	name       string
214	stat       *system.StatT
215	children   map[string]*FileInfo
216	capability []byte
217	added      bool
218}
219
220// LookUp looks up the file information of a file.
221func (info *FileInfo) LookUp(path string) *FileInfo {
222	// As this runs on the daemon side, file paths are OS specific.
223	parent := info
224	if path == string(os.PathSeparator) {
225		return info
226	}
227
228	pathElements := strings.Split(path, string(os.PathSeparator))
229	for _, elem := range pathElements {
230		if elem != "" {
231			child := parent.children[elem]
232			if child == nil {
233				return nil
234			}
235			parent = child
236		}
237	}
238	return parent
239}
240
241func (info *FileInfo) path() string {
242	if info.parent == nil {
243		// As this runs on the daemon side, file paths are OS specific.
244		return string(os.PathSeparator)
245	}
246	return filepath.Join(info.parent.path(), info.name)
247}
248
249func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
250
251	sizeAtEntry := len(*changes)
252
253	if oldInfo == nil {
254		// add
255		change := Change{
256			Path: info.path(),
257			Kind: ChangeAdd,
258		}
259		*changes = append(*changes, change)
260		info.added = true
261	}
262
263	// We make a copy so we can modify it to detect additions
264	// also, we only recurse on the old dir if the new info is a directory
265	// otherwise any previous delete/change is considered recursive
266	oldChildren := make(map[string]*FileInfo)
267	if oldInfo != nil && info.isDir() {
268		for k, v := range oldInfo.children {
269			oldChildren[k] = v
270		}
271	}
272
273	for name, newChild := range info.children {
274		oldChild := oldChildren[name]
275		if oldChild != nil {
276			// change?
277			oldStat := oldChild.stat
278			newStat := newChild.stat
279			// Note: We can't compare inode or ctime or blocksize here, because these change
280			// when copying a file into a container. However, that is not generally a problem
281			// because any content change will change mtime, and any status change should
282			// be visible when actually comparing the stat fields. The only time this
283			// breaks down is if some code intentionally hides a change by setting
284			// back mtime
285			if statDifferent(oldStat, newStat) ||
286				!bytes.Equal(oldChild.capability, newChild.capability) {
287				change := Change{
288					Path: newChild.path(),
289					Kind: ChangeModify,
290				}
291				*changes = append(*changes, change)
292				newChild.added = true
293			}
294
295			// Remove from copy so we can detect deletions
296			delete(oldChildren, name)
297		}
298
299		newChild.addChanges(oldChild, changes)
300	}
301	for _, oldChild := range oldChildren {
302		// delete
303		change := Change{
304			Path: oldChild.path(),
305			Kind: ChangeDelete,
306		}
307		*changes = append(*changes, change)
308	}
309
310	// If there were changes inside this directory, we need to add it, even if the directory
311	// itself wasn't changed. This is needed to properly save and restore filesystem permissions.
312	// As this runs on the daemon side, file paths are OS specific.
313	if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
314		change := Change{
315			Path: info.path(),
316			Kind: ChangeModify,
317		}
318		// Let's insert the directory entry before the recently added entries located inside this dir
319		*changes = append(*changes, change) // just to resize the slice, will be overwritten
320		copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
321		(*changes)[sizeAtEntry] = change
322	}
323
324}
325
326// Changes add changes to file information.
327func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
328	var changes []Change
329
330	info.addChanges(oldInfo, &changes)
331
332	return changes
333}
334
335func newRootFileInfo() *FileInfo {
336	// As this runs on the daemon side, file paths are OS specific.
337	root := &FileInfo{
338		name:     string(os.PathSeparator),
339		children: make(map[string]*FileInfo),
340	}
341	return root
342}
343
344// ChangesDirs compares two directories and generates an array of Change objects describing the changes.
345// If oldDir is "", then all files in newDir will be Add-Changes.
346func ChangesDirs(newDir, oldDir string) ([]Change, error) {
347	var (
348		oldRoot, newRoot *FileInfo
349	)
350	if oldDir == "" {
351		emptyDir, err := ioutil.TempDir("", "empty")
352		if err != nil {
353			return nil, err
354		}
355		defer os.Remove(emptyDir)
356		oldDir = emptyDir
357	}
358	oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
359	if err != nil {
360		return nil, err
361	}
362
363	return newRoot.Changes(oldRoot), nil
364}
365
366// ChangesSize calculates the size in bytes of the provided changes, based on newDir.
367func ChangesSize(newDir string, changes []Change) int64 {
368	var (
369		size int64
370		sf   = make(map[uint64]struct{})
371	)
372	for _, change := range changes {
373		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
374			file := filepath.Join(newDir, change.Path)
375			fileInfo, err := os.Lstat(file)
376			if err != nil {
377				logrus.Errorf("Can not stat %q: %s", file, err)
378				continue
379			}
380
381			if fileInfo != nil && !fileInfo.IsDir() {
382				if hasHardlinks(fileInfo) {
383					inode := getIno(fileInfo)
384					if _, ok := sf[inode]; !ok {
385						size += fileInfo.Size()
386						sf[inode] = struct{}{}
387					}
388				} else {
389					size += fileInfo.Size()
390				}
391			}
392		}
393	}
394	return size
395}
396
397// ExportChanges produces an Archive from the provided changes, relative to dir.
398func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) {
399	reader, writer := io.Pipe()
400	go func() {
401		ta := newTarAppender(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil)
402
403		// this buffer is needed for the duration of this piped stream
404		defer pools.BufioWriter32KPool.Put(ta.Buffer)
405
406		sort.Sort(changesByPath(changes))
407
408		// In general we log errors here but ignore them because
409		// during e.g. a diff operation the container can continue
410		// mutating the filesystem and we can see transient errors
411		// from this
412		for _, change := range changes {
413			if change.Kind == ChangeDelete {
414				whiteOutDir := filepath.Dir(change.Path)
415				whiteOutBase := filepath.Base(change.Path)
416				whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
417				timestamp := time.Now()
418				hdr := &tar.Header{
419					Name:       whiteOut[1:],
420					Size:       0,
421					ModTime:    timestamp,
422					AccessTime: timestamp,
423					ChangeTime: timestamp,
424				}
425				if err := ta.TarWriter.WriteHeader(hdr); err != nil {
426					logrus.Debugf("Can't write whiteout header: %s", err)
427				}
428			} else {
429				path := filepath.Join(dir, change.Path)
430				if err := ta.addTarFile(path, change.Path[1:]); err != nil {
431					logrus.Debugf("Can't add file %s to tar: %s", path, err)
432				}
433			}
434		}
435
436		// Make sure to check the error on Close.
437		if err := ta.TarWriter.Close(); err != nil {
438			logrus.Debugf("Can't close layer: %s", err)
439		}
440		if err := writer.Close(); err != nil {
441			logrus.Debugf("failed close Changes writer: %s", err)
442		}
443	}()
444	return reader, nil
445}
446