1package archive // import "github.com/docker/docker/pkg/archive" 2 3import ( 4 "archive/tar" 5 "bytes" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "path/filepath" 11 "sort" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/docker/docker/pkg/idtools" 17 "github.com/docker/docker/pkg/pools" 18 "github.com/docker/docker/pkg/system" 19 "github.com/sirupsen/logrus" 20) 21 22// ChangeType represents the change type. 23type ChangeType int 24 25const ( 26 // ChangeModify represents the modify operation. 27 ChangeModify = iota 28 // ChangeAdd represents the add operation. 29 ChangeAdd 30 // ChangeDelete represents the delete operation. 31 ChangeDelete 32) 33 34func (c ChangeType) String() string { 35 switch c { 36 case ChangeModify: 37 return "C" 38 case ChangeAdd: 39 return "A" 40 case ChangeDelete: 41 return "D" 42 } 43 return "" 44} 45 46// Change represents a change, it wraps the change type and path. 47// It describes changes of the files in the path respect to the 48// parent layers. The change could be modify, add, delete. 49// This is used for layer diff. 50type Change struct { 51 Path string 52 Kind ChangeType 53} 54 55func (change *Change) String() string { 56 return fmt.Sprintf("%s %s", change.Kind, change.Path) 57} 58 59// for sort.Sort 60type changesByPath []Change 61 62func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path } 63func (c changesByPath) Len() int { return len(c) } 64func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] } 65 66// Gnu tar doesn't have sub-second mtime precision. The go tar 67// writer (1.10+) does when using PAX format, but we round times to seconds 68// to ensure archives have the same hashes for backwards compatibility. 69// See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4. 70// 71// Non-sub-second is problematic when we apply changes via tar 72// files. We handle this by comparing for exact times, *or* same 73// second count and either a or b having exactly 0 nanoseconds 74func sameFsTime(a, b time.Time) bool { 75 return a.Equal(b) || 76 (a.Unix() == b.Unix() && 77 (a.Nanosecond() == 0 || b.Nanosecond() == 0)) 78} 79 80func sameFsTimeSpec(a, b syscall.Timespec) bool { 81 return a.Sec == b.Sec && 82 (a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0) 83} 84 85// Changes walks the path rw and determines changes for the files in the path, 86// with respect to the parent layers 87func Changes(layers []string, rw string) ([]Change, error) { 88 return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip) 89} 90 91func aufsMetadataSkip(path string) (skip bool, err error) { 92 skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path) 93 if err != nil { 94 skip = true 95 } 96 return 97} 98 99func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) { 100 f := filepath.Base(path) 101 102 // If there is a whiteout, then the file was removed 103 if strings.HasPrefix(f, WhiteoutPrefix) { 104 originalFile := f[len(WhiteoutPrefix):] 105 return filepath.Join(filepath.Dir(path), originalFile), nil 106 } 107 108 return "", nil 109} 110 111type skipChange func(string) (bool, error) 112type deleteChange func(string, string, os.FileInfo) (string, error) 113 114func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) { 115 var ( 116 changes []Change 117 changedDirs = make(map[string]struct{}) 118 ) 119 120 err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error { 121 if err != nil { 122 return err 123 } 124 125 // Rebase path 126 path, err = filepath.Rel(rw, path) 127 if err != nil { 128 return err 129 } 130 131 // As this runs on the daemon side, file paths are OS specific. 132 path = filepath.Join(string(os.PathSeparator), path) 133 134 // Skip root 135 if path == string(os.PathSeparator) { 136 return nil 137 } 138 139 if sc != nil { 140 if skip, err := sc(path); skip { 141 return err 142 } 143 } 144 145 change := Change{ 146 Path: path, 147 } 148 149 deletedFile, err := dc(rw, path, f) 150 if err != nil { 151 return err 152 } 153 154 // Find out what kind of modification happened 155 if deletedFile != "" { 156 change.Path = deletedFile 157 change.Kind = ChangeDelete 158 } else { 159 // Otherwise, the file was added 160 change.Kind = ChangeAdd 161 162 // ...Unless it already existed in a top layer, in which case, it's a modification 163 for _, layer := range layers { 164 stat, err := os.Stat(filepath.Join(layer, path)) 165 if err != nil && !os.IsNotExist(err) { 166 return err 167 } 168 if err == nil { 169 // The file existed in the top layer, so that's a modification 170 171 // However, if it's a directory, maybe it wasn't actually modified. 172 // If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar 173 if stat.IsDir() && f.IsDir() { 174 if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) { 175 // Both directories are the same, don't record the change 176 return nil 177 } 178 } 179 change.Kind = ChangeModify 180 break 181 } 182 } 183 } 184 185 // If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files. 186 // This block is here to ensure the change is recorded even if the 187 // modify time, mode and size of the parent directory in the rw and ro layers are all equal. 188 // Check https://github.com/docker/docker/pull/13590 for details. 189 if f.IsDir() { 190 changedDirs[path] = struct{}{} 191 } 192 if change.Kind == ChangeAdd || change.Kind == ChangeDelete { 193 parent := filepath.Dir(path) 194 if _, ok := changedDirs[parent]; !ok && parent != "/" { 195 changes = append(changes, Change{Path: parent, Kind: ChangeModify}) 196 changedDirs[parent] = struct{}{} 197 } 198 } 199 200 // Record change 201 changes = append(changes, change) 202 return nil 203 }) 204 if err != nil && !os.IsNotExist(err) { 205 return nil, err 206 } 207 return changes, nil 208} 209 210// FileInfo describes the information of a file. 211type FileInfo struct { 212 parent *FileInfo 213 name string 214 stat *system.StatT 215 children map[string]*FileInfo 216 capability []byte 217 added bool 218} 219 220// LookUp looks up the file information of a file. 221func (info *FileInfo) LookUp(path string) *FileInfo { 222 // As this runs on the daemon side, file paths are OS specific. 223 parent := info 224 if path == string(os.PathSeparator) { 225 return info 226 } 227 228 pathElements := strings.Split(path, string(os.PathSeparator)) 229 for _, elem := range pathElements { 230 if elem != "" { 231 child := parent.children[elem] 232 if child == nil { 233 return nil 234 } 235 parent = child 236 } 237 } 238 return parent 239} 240 241func (info *FileInfo) path() string { 242 if info.parent == nil { 243 // As this runs on the daemon side, file paths are OS specific. 244 return string(os.PathSeparator) 245 } 246 return filepath.Join(info.parent.path(), info.name) 247} 248 249func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) { 250 251 sizeAtEntry := len(*changes) 252 253 if oldInfo == nil { 254 // add 255 change := Change{ 256 Path: info.path(), 257 Kind: ChangeAdd, 258 } 259 *changes = append(*changes, change) 260 info.added = true 261 } 262 263 // We make a copy so we can modify it to detect additions 264 // also, we only recurse on the old dir if the new info is a directory 265 // otherwise any previous delete/change is considered recursive 266 oldChildren := make(map[string]*FileInfo) 267 if oldInfo != nil && info.isDir() { 268 for k, v := range oldInfo.children { 269 oldChildren[k] = v 270 } 271 } 272 273 for name, newChild := range info.children { 274 oldChild := oldChildren[name] 275 if oldChild != nil { 276 // change? 277 oldStat := oldChild.stat 278 newStat := newChild.stat 279 // Note: We can't compare inode or ctime or blocksize here, because these change 280 // when copying a file into a container. However, that is not generally a problem 281 // because any content change will change mtime, and any status change should 282 // be visible when actually comparing the stat fields. The only time this 283 // breaks down is if some code intentionally hides a change by setting 284 // back mtime 285 if statDifferent(oldStat, newStat) || 286 !bytes.Equal(oldChild.capability, newChild.capability) { 287 change := Change{ 288 Path: newChild.path(), 289 Kind: ChangeModify, 290 } 291 *changes = append(*changes, change) 292 newChild.added = true 293 } 294 295 // Remove from copy so we can detect deletions 296 delete(oldChildren, name) 297 } 298 299 newChild.addChanges(oldChild, changes) 300 } 301 for _, oldChild := range oldChildren { 302 // delete 303 change := Change{ 304 Path: oldChild.path(), 305 Kind: ChangeDelete, 306 } 307 *changes = append(*changes, change) 308 } 309 310 // If there were changes inside this directory, we need to add it, even if the directory 311 // itself wasn't changed. This is needed to properly save and restore filesystem permissions. 312 // As this runs on the daemon side, file paths are OS specific. 313 if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) { 314 change := Change{ 315 Path: info.path(), 316 Kind: ChangeModify, 317 } 318 // Let's insert the directory entry before the recently added entries located inside this dir 319 *changes = append(*changes, change) // just to resize the slice, will be overwritten 320 copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:]) 321 (*changes)[sizeAtEntry] = change 322 } 323 324} 325 326// Changes add changes to file information. 327func (info *FileInfo) Changes(oldInfo *FileInfo) []Change { 328 var changes []Change 329 330 info.addChanges(oldInfo, &changes) 331 332 return changes 333} 334 335func newRootFileInfo() *FileInfo { 336 // As this runs on the daemon side, file paths are OS specific. 337 root := &FileInfo{ 338 name: string(os.PathSeparator), 339 children: make(map[string]*FileInfo), 340 } 341 return root 342} 343 344// ChangesDirs compares two directories and generates an array of Change objects describing the changes. 345// If oldDir is "", then all files in newDir will be Add-Changes. 346func ChangesDirs(newDir, oldDir string) ([]Change, error) { 347 var ( 348 oldRoot, newRoot *FileInfo 349 ) 350 if oldDir == "" { 351 emptyDir, err := ioutil.TempDir("", "empty") 352 if err != nil { 353 return nil, err 354 } 355 defer os.Remove(emptyDir) 356 oldDir = emptyDir 357 } 358 oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir) 359 if err != nil { 360 return nil, err 361 } 362 363 return newRoot.Changes(oldRoot), nil 364} 365 366// ChangesSize calculates the size in bytes of the provided changes, based on newDir. 367func ChangesSize(newDir string, changes []Change) int64 { 368 var ( 369 size int64 370 sf = make(map[uint64]struct{}) 371 ) 372 for _, change := range changes { 373 if change.Kind == ChangeModify || change.Kind == ChangeAdd { 374 file := filepath.Join(newDir, change.Path) 375 fileInfo, err := os.Lstat(file) 376 if err != nil { 377 logrus.Errorf("Can not stat %q: %s", file, err) 378 continue 379 } 380 381 if fileInfo != nil && !fileInfo.IsDir() { 382 if hasHardlinks(fileInfo) { 383 inode := getIno(fileInfo) 384 if _, ok := sf[inode]; !ok { 385 size += fileInfo.Size() 386 sf[inode] = struct{}{} 387 } 388 } else { 389 size += fileInfo.Size() 390 } 391 } 392 } 393 } 394 return size 395} 396 397// ExportChanges produces an Archive from the provided changes, relative to dir. 398func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) { 399 reader, writer := io.Pipe() 400 go func() { 401 ta := newTarAppender(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil) 402 403 // this buffer is needed for the duration of this piped stream 404 defer pools.BufioWriter32KPool.Put(ta.Buffer) 405 406 sort.Sort(changesByPath(changes)) 407 408 // In general we log errors here but ignore them because 409 // during e.g. a diff operation the container can continue 410 // mutating the filesystem and we can see transient errors 411 // from this 412 for _, change := range changes { 413 if change.Kind == ChangeDelete { 414 whiteOutDir := filepath.Dir(change.Path) 415 whiteOutBase := filepath.Base(change.Path) 416 whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase) 417 timestamp := time.Now() 418 hdr := &tar.Header{ 419 Name: whiteOut[1:], 420 Size: 0, 421 ModTime: timestamp, 422 AccessTime: timestamp, 423 ChangeTime: timestamp, 424 } 425 if err := ta.TarWriter.WriteHeader(hdr); err != nil { 426 logrus.Debugf("Can't write whiteout header: %s", err) 427 } 428 } else { 429 path := filepath.Join(dir, change.Path) 430 if err := ta.addTarFile(path, change.Path[1:]); err != nil { 431 logrus.Debugf("Can't add file %s to tar: %s", path, err) 432 } 433 } 434 } 435 436 // Make sure to check the error on Close. 437 if err := ta.TarWriter.Close(); err != nil { 438 logrus.Debugf("Can't close layer: %s", err) 439 } 440 if err := writer.Close(); err != nil { 441 logrus.Debugf("failed close Changes writer: %s", err) 442 } 443 }() 444 return reader, nil 445} 446