1package archive // import "github.com/ory/dockertest/docker/pkg/archive"
2
3import (
4	"archive/tar"
5	"bytes"
6	"fmt"
7	"io"
8	"io/ioutil"
9	"os"
10	"path/filepath"
11	"sort"
12	"strings"
13	"syscall"
14	"time"
15
16	"github.com/ory/dockertest/docker/pkg/idtools"
17	"github.com/ory/dockertest/docker/pkg/pools"
18	"github.com/ory/dockertest/docker/pkg/system"
19	"github.com/sirupsen/logrus"
20)
21
22// ChangeType represents the change type.
23type ChangeType int
24
25const (
26	// ChangeModify represents the modify operation.
27	ChangeModify = iota
28	// ChangeAdd represents the add operation.
29	ChangeAdd
30	// ChangeDelete represents the delete operation.
31	ChangeDelete
32)
33
34func (c ChangeType) String() string {
35	switch c {
36	case ChangeModify:
37		return "C"
38	case ChangeAdd:
39		return "A"
40	case ChangeDelete:
41		return "D"
42	}
43	return ""
44}
45
46// Change represents a change, it wraps the change type and path.
47// It describes changes of the files in the path respect to the
48// parent layers. The change could be modify, add, delete.
49// This is used for layer diff.
50type Change struct {
51	Path string
52	Kind ChangeType
53}
54
55func (change *Change) String() string {
56	return fmt.Sprintf("%s %s", change.Kind, change.Path)
57}
58
59// for sort.Sort
60type changesByPath []Change
61
62func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
63func (c changesByPath) Len() int           { return len(c) }
64func (c changesByPath) Swap(i, j int)      { c[j], c[i] = c[i], c[j] }
65
66// Gnu tar and the go tar writer don't have sub-second mtime
67// precision, which is problematic when we apply changes via tar
68// files, we handle this by comparing for exact times, *or* same
69// second count and either a or b having exactly 0 nanoseconds
70func sameFsTime(a, b time.Time) bool {
71	return a == b ||
72		(a.Unix() == b.Unix() &&
73			(a.Nanosecond() == 0 || b.Nanosecond() == 0))
74}
75
76func sameFsTimeSpec(a, b syscall.Timespec) bool {
77	return a.Sec == b.Sec &&
78		(a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
79}
80
81// Changes walks the path rw and determines changes for the files in the path,
82// with respect to the parent layers
83func Changes(layers []string, rw string) ([]Change, error) {
84	return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
85}
86
87func aufsMetadataSkip(path string) (skip bool, err error) {
88	skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
89	if err != nil {
90		skip = true
91	}
92	return
93}
94
95func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
96	f := filepath.Base(path)
97
98	// If there is a whiteout, then the file was removed
99	if strings.HasPrefix(f, WhiteoutPrefix) {
100		originalFile := f[len(WhiteoutPrefix):]
101		return filepath.Join(filepath.Dir(path), originalFile), nil
102	}
103
104	return "", nil
105}
106
107type skipChange func(string) (bool, error)
108type deleteChange func(string, string, os.FileInfo) (string, error)
109
110func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
111	var (
112		changes     []Change
113		changedDirs = make(map[string]struct{})
114	)
115
116	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
117		if err != nil {
118			return err
119		}
120
121		// Rebase path
122		path, err = filepath.Rel(rw, path)
123		if err != nil {
124			return err
125		}
126
127		// As this runs on the daemon side, file paths are OS specific.
128		path = filepath.Join(string(os.PathSeparator), path)
129
130		// Skip root
131		if path == string(os.PathSeparator) {
132			return nil
133		}
134
135		if sc != nil {
136			if skip, err := sc(path); skip {
137				return err
138			}
139		}
140
141		change := Change{
142			Path: path,
143		}
144
145		deletedFile, err := dc(rw, path, f)
146		if err != nil {
147			return err
148		}
149
150		// Find out what kind of modification happened
151		if deletedFile != "" {
152			change.Path = deletedFile
153			change.Kind = ChangeDelete
154		} else {
155			// Otherwise, the file was added
156			change.Kind = ChangeAdd
157
158			// ...Unless it already existed in a top layer, in which case, it's a modification
159			for _, layer := range layers {
160				stat, err := os.Stat(filepath.Join(layer, path))
161				if err != nil && !os.IsNotExist(err) {
162					return err
163				}
164				if err == nil {
165					// The file existed in the top layer, so that's a modification
166
167					// However, if it's a directory, maybe it wasn't actually modified.
168					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
169					if stat.IsDir() && f.IsDir() {
170						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
171							// Both directories are the same, don't record the change
172							return nil
173						}
174					}
175					change.Kind = ChangeModify
176					break
177				}
178			}
179		}
180
181		// If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
182		// This block is here to ensure the change is recorded even if the
183		// modify time, mode and size of the parent directory in the rw and ro layers are all equal.
184		// Check https://github.com/docker/docker/pull/13590 for details.
185		if f.IsDir() {
186			changedDirs[path] = struct{}{}
187		}
188		if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
189			parent := filepath.Dir(path)
190			if _, ok := changedDirs[parent]; !ok && parent != "/" {
191				changes = append(changes, Change{Path: parent, Kind: ChangeModify})
192				changedDirs[parent] = struct{}{}
193			}
194		}
195
196		// Record change
197		changes = append(changes, change)
198		return nil
199	})
200	if err != nil && !os.IsNotExist(err) {
201		return nil, err
202	}
203	return changes, nil
204}
205
206// FileInfo describes the information of a file.
207type FileInfo struct {
208	parent     *FileInfo
209	name       string
210	stat       *system.StatT
211	children   map[string]*FileInfo
212	capability []byte
213	added      bool
214}
215
216// LookUp looks up the file information of a file.
217func (info *FileInfo) LookUp(path string) *FileInfo {
218	// As this runs on the daemon side, file paths are OS specific.
219	parent := info
220	if path == string(os.PathSeparator) {
221		return info
222	}
223
224	pathElements := strings.Split(path, string(os.PathSeparator))
225	for _, elem := range pathElements {
226		if elem != "" {
227			child := parent.children[elem]
228			if child == nil {
229				return nil
230			}
231			parent = child
232		}
233	}
234	return parent
235}
236
237func (info *FileInfo) path() string {
238	if info.parent == nil {
239		// As this runs on the daemon side, file paths are OS specific.
240		return string(os.PathSeparator)
241	}
242	return filepath.Join(info.parent.path(), info.name)
243}
244
245func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
246
247	sizeAtEntry := len(*changes)
248
249	if oldInfo == nil {
250		// add
251		change := Change{
252			Path: info.path(),
253			Kind: ChangeAdd,
254		}
255		*changes = append(*changes, change)
256		info.added = true
257	}
258
259	// We make a copy so we can modify it to detect additions
260	// also, we only recurse on the old dir if the new info is a directory
261	// otherwise any previous delete/change is considered recursive
262	oldChildren := make(map[string]*FileInfo)
263	if oldInfo != nil && info.isDir() {
264		for k, v := range oldInfo.children {
265			oldChildren[k] = v
266		}
267	}
268
269	for name, newChild := range info.children {
270		oldChild := oldChildren[name]
271		if oldChild != nil {
272			// change?
273			oldStat := oldChild.stat
274			newStat := newChild.stat
275			// Note: We can't compare inode or ctime or blocksize here, because these change
276			// when copying a file into a container. However, that is not generally a problem
277			// because any content change will change mtime, and any status change should
278			// be visible when actually comparing the stat fields. The only time this
279			// breaks down is if some code intentionally hides a change by setting
280			// back mtime
281			if statDifferent(oldStat, newStat) ||
282				!bytes.Equal(oldChild.capability, newChild.capability) {
283				change := Change{
284					Path: newChild.path(),
285					Kind: ChangeModify,
286				}
287				*changes = append(*changes, change)
288				newChild.added = true
289			}
290
291			// Remove from copy so we can detect deletions
292			delete(oldChildren, name)
293		}
294
295		newChild.addChanges(oldChild, changes)
296	}
297	for _, oldChild := range oldChildren {
298		// delete
299		change := Change{
300			Path: oldChild.path(),
301			Kind: ChangeDelete,
302		}
303		*changes = append(*changes, change)
304	}
305
306	// If there were changes inside this directory, we need to add it, even if the directory
307	// itself wasn't changed. This is needed to properly save and restore filesystem permissions.
308	// As this runs on the daemon side, file paths are OS specific.
309	if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
310		change := Change{
311			Path: info.path(),
312			Kind: ChangeModify,
313		}
314		// Let's insert the directory entry before the recently added entries located inside this dir
315		*changes = append(*changes, change) // just to resize the slice, will be overwritten
316		copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
317		(*changes)[sizeAtEntry] = change
318	}
319
320}
321
322// Changes add changes to file information.
323func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
324	var changes []Change
325
326	info.addChanges(oldInfo, &changes)
327
328	return changes
329}
330
331func newRootFileInfo() *FileInfo {
332	// As this runs on the daemon side, file paths are OS specific.
333	root := &FileInfo{
334		name:     string(os.PathSeparator),
335		children: make(map[string]*FileInfo),
336	}
337	return root
338}
339
340// ChangesDirs compares two directories and generates an array of Change objects describing the changes.
341// If oldDir is "", then all files in newDir will be Add-Changes.
342func ChangesDirs(newDir, oldDir string) ([]Change, error) {
343	var (
344		oldRoot, newRoot *FileInfo
345	)
346	if oldDir == "" {
347		emptyDir, err := ioutil.TempDir("", "empty")
348		if err != nil {
349			return nil, err
350		}
351		defer os.Remove(emptyDir)
352		oldDir = emptyDir
353	}
354	oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
355	if err != nil {
356		return nil, err
357	}
358
359	return newRoot.Changes(oldRoot), nil
360}
361
362// ChangesSize calculates the size in bytes of the provided changes, based on newDir.
363func ChangesSize(newDir string, changes []Change) int64 {
364	var (
365		size int64
366		sf   = make(map[uint64]struct{})
367	)
368	for _, change := range changes {
369		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
370			file := filepath.Join(newDir, change.Path)
371			fileInfo, err := os.Lstat(file)
372			if err != nil {
373				logrus.Errorf("Can not stat %q: %s", file, err)
374				continue
375			}
376
377			if fileInfo != nil && !fileInfo.IsDir() {
378				if hasHardlinks(fileInfo) {
379					inode := getIno(fileInfo)
380					if _, ok := sf[inode]; !ok {
381						size += fileInfo.Size()
382						sf[inode] = struct{}{}
383					}
384				} else {
385					size += fileInfo.Size()
386				}
387			}
388		}
389	}
390	return size
391}
392
393// ExportChanges produces an Archive from the provided changes, relative to dir.
394func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) {
395	reader, writer := io.Pipe()
396	go func() {
397		ta := newTarAppender(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil)
398
399		// this buffer is needed for the duration of this piped stream
400		defer pools.BufioWriter32KPool.Put(ta.Buffer)
401
402		sort.Sort(changesByPath(changes))
403
404		// In general we log errors here but ignore them because
405		// during e.g. a diff operation the container can continue
406		// mutating the filesystem and we can see transient errors
407		// from this
408		for _, change := range changes {
409			if change.Kind == ChangeDelete {
410				whiteOutDir := filepath.Dir(change.Path)
411				whiteOutBase := filepath.Base(change.Path)
412				whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
413				timestamp := time.Now()
414				hdr := &tar.Header{
415					Name:       whiteOut[1:],
416					Size:       0,
417					ModTime:    timestamp,
418					AccessTime: timestamp,
419					ChangeTime: timestamp,
420				}
421				if err := ta.TarWriter.WriteHeader(hdr); err != nil {
422					logrus.Debugf("Can't write whiteout header: %s", err)
423				}
424			} else {
425				path := filepath.Join(dir, change.Path)
426				if err := ta.addTarFile(path, change.Path[1:]); err != nil {
427					logrus.Debugf("Can't add file %s to tar: %s", path, err)
428				}
429			}
430		}
431
432		// Make sure to check the error on Close.
433		if err := ta.TarWriter.Close(); err != nil {
434			logrus.Debugf("Can't close layer: %s", err)
435		}
436		if err := writer.Close(); err != nil {
437			logrus.Debugf("failed close Changes writer: %s", err)
438		}
439	}()
440	return reader, nil
441}
442