1// +build linux
2
3package devmapper // import "github.com/docker/docker/daemon/graphdriver/devmapper"
4
5import (
6	"bufio"
7	"encoding/json"
8	"fmt"
9	"io"
10	"io/ioutil"
11	"os"
12	"os/exec"
13	"path"
14	"path/filepath"
15	"reflect"
16	"strconv"
17	"strings"
18	"sync"
19	"time"
20
21	"github.com/docker/docker/daemon/graphdriver"
22	"github.com/docker/docker/dockerversion"
23	"github.com/docker/docker/pkg/devicemapper"
24	"github.com/docker/docker/pkg/dmesg"
25	"github.com/docker/docker/pkg/idtools"
26	"github.com/docker/docker/pkg/loopback"
27	"github.com/docker/docker/pkg/mount"
28	"github.com/docker/docker/pkg/parsers"
29	"github.com/docker/docker/pkg/parsers/kernel"
30	"github.com/docker/go-units"
31	"github.com/opencontainers/selinux/go-selinux/label"
32	"github.com/pkg/errors"
33	"github.com/sirupsen/logrus"
34	"golang.org/x/sys/unix"
35)
36
37var (
38	defaultDataLoopbackSize      int64  = 100 * 1024 * 1024 * 1024
39	defaultMetaDataLoopbackSize  int64  = 2 * 1024 * 1024 * 1024
40	defaultBaseFsSize            uint64 = 10 * 1024 * 1024 * 1024
41	defaultThinpBlockSize        uint32 = 128 // 64K = 128 512b sectors
42	defaultUdevSyncOverride             = false
43	maxDeviceID                         = 0xffffff // 24 bit, pool limit
44	deviceIDMapSz                       = (maxDeviceID + 1) / 8
45	driverDeferredRemovalSupport        = false
46	enableDeferredRemoval               = false
47	enableDeferredDeletion              = false
48	userBaseSize                        = false
49	defaultMinFreeSpacePercent   uint32 = 10
50	lvmSetupConfigForce          bool
51)
52
53const deviceSetMetaFile = "deviceset-metadata"
54const transactionMetaFile = "transaction-metadata"
55
56type transaction struct {
57	OpenTransactionID uint64 `json:"open_transaction_id"`
58	DeviceIDHash      string `json:"device_hash"`
59	DeviceID          int    `json:"device_id"`
60}
61
62type devInfo struct {
63	Hash          string `json:"-"`
64	DeviceID      int    `json:"device_id"`
65	Size          uint64 `json:"size"`
66	TransactionID uint64 `json:"transaction_id"`
67	Initialized   bool   `json:"initialized"`
68	Deleted       bool   `json:"deleted"`
69	devices       *DeviceSet
70
71	// The global DeviceSet lock guarantees that we serialize all
72	// the calls to libdevmapper (which is not threadsafe), but we
73	// sometimes release that lock while sleeping. In that case
74	// this per-device lock is still held, protecting against
75	// other accesses to the device that we're doing the wait on.
76	//
77	// WARNING: In order to avoid AB-BA deadlocks when releasing
78	// the global lock while holding the per-device locks all
79	// device locks must be acquired *before* the device lock, and
80	// multiple device locks should be acquired parent before child.
81	lock sync.Mutex
82}
83
84type metaData struct {
85	Devices map[string]*devInfo `json:"Devices"`
86}
87
88// DeviceSet holds information about list of devices
89type DeviceSet struct {
90	metaData      `json:"-"`
91	sync.Mutex    `json:"-"` // Protects all fields of DeviceSet and serializes calls into libdevmapper
92	root          string
93	devicePrefix  string
94	TransactionID uint64 `json:"-"`
95	NextDeviceID  int    `json:"next_device_id"`
96	deviceIDMap   []byte
97
98	// Options
99	dataLoopbackSize      int64
100	metaDataLoopbackSize  int64
101	baseFsSize            uint64
102	filesystem            string
103	mountOptions          string
104	mkfsArgs              []string
105	dataDevice            string // block or loop dev
106	dataLoopFile          string // loopback file, if used
107	metadataDevice        string // block or loop dev
108	metadataLoopFile      string // loopback file, if used
109	doBlkDiscard          bool
110	thinpBlockSize        uint32
111	thinPoolDevice        string
112	transaction           `json:"-"`
113	overrideUdevSyncCheck bool
114	deferredRemove        bool   // use deferred removal
115	deferredDelete        bool   // use deferred deletion
116	BaseDeviceUUID        string // save UUID of base device
117	BaseDeviceFilesystem  string // save filesystem of base device
118	nrDeletedDevices      uint   // number of deleted devices
119	deletionWorkerTicker  *time.Ticker
120	uidMaps               []idtools.IDMap
121	gidMaps               []idtools.IDMap
122	minFreeSpacePercent   uint32 //min free space percentage in thinpool
123	xfsNospaceRetries     string // max retries when xfs receives ENOSPC
124	lvmSetupConfig        directLVMConfig
125}
126
127// DiskUsage contains information about disk usage and is used when reporting Status of a device.
128type DiskUsage struct {
129	// Used bytes on the disk.
130	Used uint64
131	// Total bytes on the disk.
132	Total uint64
133	// Available bytes on the disk.
134	Available uint64
135}
136
137// Status returns the information about the device.
138type Status struct {
139	// PoolName is the name of the data pool.
140	PoolName string
141	// DataFile is the actual block device for data.
142	DataFile string
143	// DataLoopback loopback file, if used.
144	DataLoopback string
145	// MetadataFile is the actual block device for metadata.
146	MetadataFile string
147	// MetadataLoopback is the loopback file, if used.
148	MetadataLoopback string
149	// Data is the disk used for data.
150	Data DiskUsage
151	// Metadata is the disk used for meta data.
152	Metadata DiskUsage
153	// BaseDeviceSize is base size of container and image
154	BaseDeviceSize uint64
155	// BaseDeviceFS is backing filesystem.
156	BaseDeviceFS string
157	// SectorSize size of the vector.
158	SectorSize uint64
159	// UdevSyncSupported is true if sync is supported.
160	UdevSyncSupported bool
161	// DeferredRemoveEnabled is true then the device is not unmounted.
162	DeferredRemoveEnabled bool
163	// True if deferred deletion is enabled. This is different from
164	// deferred removal. "removal" means that device mapper device is
165	// deactivated. Thin device is still in thin pool and can be activated
166	// again. But "deletion" means that thin device will be deleted from
167	// thin pool and it can't be activated again.
168	DeferredDeleteEnabled      bool
169	DeferredDeletedDeviceCount uint
170	MinFreeSpace               uint64
171}
172
173// Structure used to export image/container metadata in docker inspect.
174type deviceMetadata struct {
175	deviceID   int
176	deviceSize uint64 // size in bytes
177	deviceName string // Device name as used during activation
178}
179
180// DevStatus returns information about device mounted containing its id, size and sector information.
181type DevStatus struct {
182	// DeviceID is the id of the device.
183	DeviceID int
184	// Size is the size of the filesystem.
185	Size uint64
186	// TransactionID is a unique integer per device set used to identify an operation on the file system, this number is incremental.
187	TransactionID uint64
188	// SizeInSectors indicates the size of the sectors allocated.
189	SizeInSectors uint64
190	// MappedSectors indicates number of mapped sectors.
191	MappedSectors uint64
192	// HighestMappedSector is the pointer to the highest mapped sector.
193	HighestMappedSector uint64
194}
195
196func getDevName(name string) string {
197	return "/dev/mapper/" + name
198}
199
200func (info *devInfo) Name() string {
201	hash := info.Hash
202	if hash == "" {
203		hash = "base"
204	}
205	return fmt.Sprintf("%s-%s", info.devices.devicePrefix, hash)
206}
207
208func (info *devInfo) DevName() string {
209	return getDevName(info.Name())
210}
211
212func (devices *DeviceSet) loopbackDir() string {
213	return path.Join(devices.root, "devicemapper")
214}
215
216func (devices *DeviceSet) metadataDir() string {
217	return path.Join(devices.root, "metadata")
218}
219
220func (devices *DeviceSet) metadataFile(info *devInfo) string {
221	file := info.Hash
222	if file == "" {
223		file = "base"
224	}
225	return path.Join(devices.metadataDir(), file)
226}
227
228func (devices *DeviceSet) transactionMetaFile() string {
229	return path.Join(devices.metadataDir(), transactionMetaFile)
230}
231
232func (devices *DeviceSet) deviceSetMetaFile() string {
233	return path.Join(devices.metadataDir(), deviceSetMetaFile)
234}
235
236func (devices *DeviceSet) oldMetadataFile() string {
237	return path.Join(devices.loopbackDir(), "json")
238}
239
240func (devices *DeviceSet) getPoolName() string {
241	if devices.thinPoolDevice == "" {
242		return devices.devicePrefix + "-pool"
243	}
244	return devices.thinPoolDevice
245}
246
247func (devices *DeviceSet) getPoolDevName() string {
248	return getDevName(devices.getPoolName())
249}
250
251func (devices *DeviceSet) hasImage(name string) bool {
252	dirname := devices.loopbackDir()
253	filename := path.Join(dirname, name)
254
255	_, err := os.Stat(filename)
256	return err == nil
257}
258
259// ensureImage creates a sparse file of <size> bytes at the path
260// <root>/devicemapper/<name>.
261// If the file already exists and new size is larger than its current size, it grows to the new size.
262// Either way it returns the full path.
263func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
264	dirname := devices.loopbackDir()
265	filename := path.Join(dirname, name)
266
267	uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps)
268	if err != nil {
269		return "", err
270	}
271	if err := idtools.MkdirAllAndChown(dirname, 0700, idtools.Identity{UID: uid, GID: gid}); err != nil {
272		return "", err
273	}
274
275	if fi, err := os.Stat(filename); err != nil {
276		if !os.IsNotExist(err) {
277			return "", err
278		}
279		logrus.WithField("storage-driver", "devicemapper").Debugf("Creating loopback file %s for device-manage use", filename)
280		file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600)
281		if err != nil {
282			return "", err
283		}
284		defer file.Close()
285
286		if err := file.Truncate(size); err != nil {
287			return "", err
288		}
289	} else {
290		if fi.Size() < size {
291			file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0600)
292			if err != nil {
293				return "", err
294			}
295			defer file.Close()
296			if err := file.Truncate(size); err != nil {
297				return "", fmt.Errorf("devmapper: Unable to grow loopback file %s: %v", filename, err)
298			}
299		} else if fi.Size() > size {
300			logrus.WithField("storage-driver", "devicemapper").Warnf("Can't shrink loopback file %s", filename)
301		}
302	}
303	return filename, nil
304}
305
306func (devices *DeviceSet) allocateTransactionID() uint64 {
307	devices.OpenTransactionID = devices.TransactionID + 1
308	return devices.OpenTransactionID
309}
310
311func (devices *DeviceSet) updatePoolTransactionID() error {
312	if err := devicemapper.SetTransactionID(devices.getPoolDevName(), devices.TransactionID, devices.OpenTransactionID); err != nil {
313		return fmt.Errorf("devmapper: Error setting devmapper transaction ID: %s", err)
314	}
315	devices.TransactionID = devices.OpenTransactionID
316	return nil
317}
318
319func (devices *DeviceSet) removeMetadata(info *devInfo) error {
320	if err := os.RemoveAll(devices.metadataFile(info)); err != nil {
321		return fmt.Errorf("devmapper: Error removing metadata file %s: %s", devices.metadataFile(info), err)
322	}
323	return nil
324}
325
326// Given json data and file path, write it to disk
327func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error {
328	tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp")
329	if err != nil {
330		return fmt.Errorf("devmapper: Error creating metadata file: %s", err)
331	}
332
333	n, err := tmpFile.Write(jsonData)
334	if err != nil {
335		return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err)
336	}
337	if n < len(jsonData) {
338		return io.ErrShortWrite
339	}
340	if err := tmpFile.Sync(); err != nil {
341		return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err)
342	}
343	if err := tmpFile.Close(); err != nil {
344		return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err)
345	}
346	if err := os.Rename(tmpFile.Name(), filePath); err != nil {
347		return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err)
348	}
349
350	return nil
351}
352
353func (devices *DeviceSet) saveMetadata(info *devInfo) error {
354	jsonData, err := json.Marshal(info)
355	if err != nil {
356		return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err)
357	}
358	return devices.writeMetaFile(jsonData, devices.metadataFile(info))
359}
360
361func (devices *DeviceSet) markDeviceIDUsed(deviceID int) {
362	var mask byte
363	i := deviceID % 8
364	mask = 1 << uint(i)
365	devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] | mask
366}
367
368func (devices *DeviceSet) markDeviceIDFree(deviceID int) {
369	var mask byte
370	i := deviceID % 8
371	mask = ^(1 << uint(i))
372	devices.deviceIDMap[deviceID/8] = devices.deviceIDMap[deviceID/8] & mask
373}
374
375func (devices *DeviceSet) isDeviceIDFree(deviceID int) bool {
376	var mask byte
377	i := deviceID % 8
378	mask = (1 << uint(i))
379	return (devices.deviceIDMap[deviceID/8] & mask) == 0
380}
381
382// Should be called with devices.Lock() held.
383func (devices *DeviceSet) lookupDevice(hash string) (*devInfo, error) {
384	info := devices.Devices[hash]
385	if info == nil {
386		info = devices.loadMetadata(hash)
387		if info == nil {
388			return nil, fmt.Errorf("devmapper: Unknown device %s", hash)
389		}
390
391		devices.Devices[hash] = info
392	}
393	return info, nil
394}
395
396func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) {
397	devices.Lock()
398	defer devices.Unlock()
399	info, err := devices.lookupDevice(hash)
400	return info, err
401}
402
403// This function relies on that device hash map has been loaded in advance.
404// Should be called with devices.Lock() held.
405func (devices *DeviceSet) constructDeviceIDMap() {
406	logrus.WithField("storage-driver", "devicemapper").Debug("constructDeviceIDMap()")
407	defer logrus.WithField("storage-driver", "devicemapper").Debug("constructDeviceIDMap() END")
408
409	for _, info := range devices.Devices {
410		devices.markDeviceIDUsed(info.DeviceID)
411		logrus.WithField("storage-driver", "devicemapper").Debugf("Added deviceId=%d to DeviceIdMap", info.DeviceID)
412	}
413}
414
415func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error {
416	logger := logrus.WithField("storage-driver", "devicemapper")
417
418	// Skip some of the meta files which are not device files.
419	if strings.HasSuffix(finfo.Name(), ".migrated") {
420		logger.Debugf("Skipping file %s", path)
421		return nil
422	}
423
424	if strings.HasPrefix(finfo.Name(), ".") {
425		logger.Debugf("Skipping file %s", path)
426		return nil
427	}
428
429	if finfo.Name() == deviceSetMetaFile {
430		logger.Debugf("Skipping file %s", path)
431		return nil
432	}
433
434	if finfo.Name() == transactionMetaFile {
435		logger.Debugf("Skipping file %s", path)
436		return nil
437	}
438
439	logger.Debugf("Loading data for file %s", path)
440
441	hash := finfo.Name()
442	if hash == "base" {
443		hash = ""
444	}
445
446	// Include deleted devices also as cleanup delete device logic
447	// will go through it and see if there are any deleted devices.
448	if _, err := devices.lookupDevice(hash); err != nil {
449		return fmt.Errorf("devmapper: Error looking up device %s:%v", hash, err)
450	}
451
452	return nil
453}
454
455func (devices *DeviceSet) loadDeviceFilesOnStart() error {
456	logrus.WithField("storage-driver", "devicemapper").Debug("loadDeviceFilesOnStart()")
457	defer logrus.WithField("storage-driver", "devicemapper").Debug("loadDeviceFilesOnStart() END")
458
459	var scan = func(path string, info os.FileInfo, err error) error {
460		if err != nil {
461			logrus.WithField("storage-driver", "devicemapper").Debugf("Can't walk the file %s", path)
462			return nil
463		}
464
465		// Skip any directories
466		if info.IsDir() {
467			return nil
468		}
469
470		return devices.deviceFileWalkFunction(path, info)
471	}
472
473	return filepath.Walk(devices.metadataDir(), scan)
474}
475
476// Should be called with devices.Lock() held.
477func (devices *DeviceSet) unregisterDevice(hash string) error {
478	logrus.WithField("storage-driver", "devicemapper").Debugf("unregisterDevice(%v)", hash)
479	info := &devInfo{
480		Hash: hash,
481	}
482
483	delete(devices.Devices, hash)
484
485	if err := devices.removeMetadata(info); err != nil {
486		logrus.WithField("storage-driver", "devicemapper").Debugf("Error removing metadata: %s", err)
487		return err
488	}
489
490	return nil
491}
492
493// Should be called with devices.Lock() held.
494func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, transactionID uint64) (*devInfo, error) {
495	logrus.WithField("storage-driver", "devicemapper").Debugf("registerDevice(%v, %v)", id, hash)
496	info := &devInfo{
497		Hash:          hash,
498		DeviceID:      id,
499		Size:          size,
500		TransactionID: transactionID,
501		Initialized:   false,
502		devices:       devices,
503	}
504
505	devices.Devices[hash] = info
506
507	if err := devices.saveMetadata(info); err != nil {
508		// Try to remove unused device
509		delete(devices.Devices, hash)
510		return nil, err
511	}
512
513	return info, nil
514}
515
516func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error {
517	logrus.WithField("storage-driver", "devicemapper").Debugf("activateDeviceIfNeeded(%v)", info.Hash)
518
519	if info.Deleted && !ignoreDeleted {
520		return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash)
521	}
522
523	// Make sure deferred removal on device is canceled, if one was
524	// scheduled.
525	if err := devices.cancelDeferredRemovalIfNeeded(info); err != nil {
526		return fmt.Errorf("devmapper: Device Deferred Removal Cancellation Failed: %s", err)
527	}
528
529	if devinfo, _ := devicemapper.GetInfo(info.Name()); devinfo != nil && devinfo.Exists != 0 {
530		return nil
531	}
532
533	return devicemapper.ActivateDevice(devices.getPoolDevName(), info.Name(), info.DeviceID, info.Size)
534}
535
536// xfsSupported checks if xfs is supported, returns nil if it is, otherwise an error
537func xfsSupported() error {
538	// Make sure mkfs.xfs is available
539	if _, err := exec.LookPath("mkfs.xfs"); err != nil {
540		return err // error text is descriptive enough
541	}
542
543	// Check if kernel supports xfs filesystem or not.
544	exec.Command("modprobe", "xfs").Run()
545
546	f, err := os.Open("/proc/filesystems")
547	if err != nil {
548		return errors.Wrapf(err, "error checking for xfs support")
549	}
550	defer f.Close()
551
552	s := bufio.NewScanner(f)
553	for s.Scan() {
554		if strings.HasSuffix(s.Text(), "\txfs") {
555			return nil
556		}
557	}
558
559	if err := s.Err(); err != nil {
560		return errors.Wrapf(err, "error checking for xfs support")
561	}
562
563	return errors.New(`kernel does not support xfs, or "modprobe xfs" failed`)
564}
565
566func determineDefaultFS() string {
567	err := xfsSupported()
568	if err == nil {
569		return "xfs"
570	}
571
572	logrus.WithField("storage-driver", "devicemapper").Warnf("XFS is not supported in your system (%v). Defaulting to ext4 filesystem", err)
573	return "ext4"
574}
575
576// mkfsOptions tries to figure out whether some additional mkfs options are required
577func mkfsOptions(fs string) []string {
578	if fs == "xfs" && !kernel.CheckKernelVersion(3, 16, 0) {
579		// For kernels earlier than 3.16 (and newer xfsutils),
580		// some xfs features need to be explicitly disabled.
581		return []string{"-m", "crc=0,finobt=0"}
582	}
583
584	return []string{}
585}
586
587func (devices *DeviceSet) createFilesystem(info *devInfo) (err error) {
588	devname := info.DevName()
589
590	if devices.filesystem == "" {
591		devices.filesystem = determineDefaultFS()
592	}
593	if err := devices.saveBaseDeviceFilesystem(devices.filesystem); err != nil {
594		return err
595	}
596
597	args := mkfsOptions(devices.filesystem)
598	args = append(args, devices.mkfsArgs...)
599	args = append(args, devname)
600
601	logrus.WithField("storage-driver", "devicemapper").Infof("Creating filesystem %s on device %s, mkfs args: %v", devices.filesystem, info.Name(), args)
602	defer func() {
603		if err != nil {
604			logrus.WithField("storage-driver", "devicemapper").Infof("Error while creating filesystem %s on device %s: %v", devices.filesystem, info.Name(), err)
605		} else {
606			logrus.WithField("storage-driver", "devicemapper").Infof("Successfully created filesystem %s on device %s", devices.filesystem, info.Name())
607		}
608	}()
609
610	switch devices.filesystem {
611	case "xfs":
612		err = exec.Command("mkfs.xfs", args...).Run()
613	case "ext4":
614		err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0,lazy_journal_init=0"}, args...)...).Run()
615		if err != nil {
616			err = exec.Command("mkfs.ext4", append([]string{"-E", "nodiscard,lazy_itable_init=0"}, args...)...).Run()
617		}
618		if err != nil {
619			return err
620		}
621		err = exec.Command("tune2fs", append([]string{"-c", "-1", "-i", "0"}, devname)...).Run()
622	default:
623		err = fmt.Errorf("devmapper: Unsupported filesystem type %s", devices.filesystem)
624	}
625	return
626}
627
628func (devices *DeviceSet) migrateOldMetaData() error {
629	// Migrate old metadata file
630	jsonData, err := ioutil.ReadFile(devices.oldMetadataFile())
631	if err != nil && !os.IsNotExist(err) {
632		return err
633	}
634
635	if jsonData != nil {
636		m := metaData{Devices: make(map[string]*devInfo)}
637
638		if err := json.Unmarshal(jsonData, &m); err != nil {
639			return err
640		}
641
642		for hash, info := range m.Devices {
643			info.Hash = hash
644			devices.saveMetadata(info)
645		}
646		if err := os.Rename(devices.oldMetadataFile(), devices.oldMetadataFile()+".migrated"); err != nil {
647			return err
648		}
649
650	}
651
652	return nil
653}
654
655// Cleanup deleted devices. It assumes that all the devices have been
656// loaded in the hash table.
657func (devices *DeviceSet) cleanupDeletedDevices() error {
658	devices.Lock()
659
660	// If there are no deleted devices, there is nothing to do.
661	if devices.nrDeletedDevices == 0 {
662		devices.Unlock()
663		return nil
664	}
665
666	var deletedDevices []*devInfo
667
668	for _, info := range devices.Devices {
669		if !info.Deleted {
670			continue
671		}
672		logrus.WithField("storage-driver", "devicemapper").Debugf("Found deleted device %s.", info.Hash)
673		deletedDevices = append(deletedDevices, info)
674	}
675
676	// Delete the deleted devices. DeleteDevice() first takes the info lock
677	// and then devices.Lock(). So drop it to avoid deadlock.
678	devices.Unlock()
679
680	for _, info := range deletedDevices {
681		// This will again try deferred deletion.
682		if err := devices.DeleteDevice(info.Hash, false); err != nil {
683			logrus.WithField("storage-driver", "devicemapper").Warnf("Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err)
684		}
685	}
686
687	return nil
688}
689
690func (devices *DeviceSet) countDeletedDevices() {
691	for _, info := range devices.Devices {
692		if !info.Deleted {
693			continue
694		}
695		devices.nrDeletedDevices++
696	}
697}
698
699func (devices *DeviceSet) startDeviceDeletionWorker() {
700	// Deferred deletion is not enabled. Don't do anything.
701	if !devices.deferredDelete {
702		return
703	}
704
705	logrus.WithField("storage-driver", "devicemapper").Debug("Worker to cleanup deleted devices started")
706	for range devices.deletionWorkerTicker.C {
707		devices.cleanupDeletedDevices()
708	}
709}
710
711func (devices *DeviceSet) initMetaData() error {
712	devices.Lock()
713	defer devices.Unlock()
714
715	if err := devices.migrateOldMetaData(); err != nil {
716		return err
717	}
718
719	_, transactionID, _, _, _, _, err := devices.poolStatus()
720	if err != nil {
721		return err
722	}
723
724	devices.TransactionID = transactionID
725
726	if err := devices.loadDeviceFilesOnStart(); err != nil {
727		return fmt.Errorf("devmapper: Failed to load device files:%v", err)
728	}
729
730	devices.constructDeviceIDMap()
731	devices.countDeletedDevices()
732
733	if err := devices.processPendingTransaction(); err != nil {
734		return err
735	}
736
737	// Start a goroutine to cleanup Deleted Devices
738	go devices.startDeviceDeletionWorker()
739	return nil
740}
741
742func (devices *DeviceSet) incNextDeviceID() {
743	// IDs are 24bit, so wrap around
744	devices.NextDeviceID = (devices.NextDeviceID + 1) & maxDeviceID
745}
746
747func (devices *DeviceSet) getNextFreeDeviceID() (int, error) {
748	devices.incNextDeviceID()
749	for i := 0; i <= maxDeviceID; i++ {
750		if devices.isDeviceIDFree(devices.NextDeviceID) {
751			devices.markDeviceIDUsed(devices.NextDeviceID)
752			return devices.NextDeviceID, nil
753		}
754		devices.incNextDeviceID()
755	}
756
757	return 0, fmt.Errorf("devmapper: Unable to find a free device ID")
758}
759
760func (devices *DeviceSet) poolHasFreeSpace() error {
761	if devices.minFreeSpacePercent == 0 {
762		return nil
763	}
764
765	_, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus()
766	if err != nil {
767		return err
768	}
769
770	minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100
771	if minFreeData < 1 {
772		minFreeData = 1
773	}
774	dataFree := dataTotal - dataUsed
775	if dataFree < minFreeData {
776		return fmt.Errorf("devmapper: Thin Pool has %v free data blocks which is less than minimum required %v free data blocks. Create more free space in thin pool or use dm.min_free_space option to change behavior", (dataTotal - dataUsed), minFreeData)
777	}
778
779	minFreeMetadata := (metadataTotal * uint64(devices.minFreeSpacePercent)) / 100
780	if minFreeMetadata < 1 {
781		minFreeMetadata = 1
782	}
783
784	metadataFree := metadataTotal - metadataUsed
785	if metadataFree < minFreeMetadata {
786		return fmt.Errorf("devmapper: Thin Pool has %v free metadata blocks which is less than minimum required %v free metadata blocks. Create more free metadata space in thin pool or use dm.min_free_space option to change behavior", (metadataTotal - metadataUsed), minFreeMetadata)
787	}
788
789	return nil
790}
791
792func (devices *DeviceSet) createRegisterDevice(hash string) (*devInfo, error) {
793	devices.Lock()
794	defer devices.Unlock()
795
796	deviceID, err := devices.getNextFreeDeviceID()
797	if err != nil {
798		return nil, err
799	}
800
801	logger := logrus.WithField("storage-driver", "devicemapper")
802
803	if err := devices.openTransaction(hash, deviceID); err != nil {
804		logger.Debugf("Error opening transaction hash = %s deviceID = %d", hash, deviceID)
805		devices.markDeviceIDFree(deviceID)
806		return nil, err
807	}
808
809	for {
810		if err := devicemapper.CreateDevice(devices.getPoolDevName(), deviceID); err != nil {
811			if devicemapper.DeviceIDExists(err) {
812				// Device ID already exists. This should not
813				// happen. Now we have a mechanism to find
814				// a free device ID. So something is not right.
815				// Give a warning and continue.
816				logger.Errorf("Device ID %d exists in pool but it is supposed to be unused", deviceID)
817				deviceID, err = devices.getNextFreeDeviceID()
818				if err != nil {
819					return nil, err
820				}
821				// Save new device id into transaction
822				devices.refreshTransaction(deviceID)
823				continue
824			}
825			logger.Debugf("Error creating device: %s", err)
826			devices.markDeviceIDFree(deviceID)
827			return nil, err
828		}
829		break
830	}
831
832	logger.Debugf("Registering device (id %v) with FS size %v", deviceID, devices.baseFsSize)
833	info, err := devices.registerDevice(deviceID, hash, devices.baseFsSize, devices.OpenTransactionID)
834	if err != nil {
835		_ = devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID)
836		devices.markDeviceIDFree(deviceID)
837		return nil, err
838	}
839
840	if err := devices.closeTransaction(); err != nil {
841		devices.unregisterDevice(hash)
842		devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID)
843		devices.markDeviceIDFree(deviceID)
844		return nil, err
845	}
846	return info, nil
847}
848
849func (devices *DeviceSet) takeSnapshot(hash string, baseInfo *devInfo, size uint64) error {
850	var (
851		devinfo *devicemapper.Info
852		err     error
853	)
854
855	if err = devices.poolHasFreeSpace(); err != nil {
856		return err
857	}
858
859	if devices.deferredRemove {
860		devinfo, err = devicemapper.GetInfoWithDeferred(baseInfo.Name())
861		if err != nil {
862			return err
863		}
864		if devinfo != nil && devinfo.DeferredRemove != 0 {
865			err = devices.cancelDeferredRemoval(baseInfo)
866			if err != nil {
867				// If Error is ErrEnxio. Device is probably already gone. Continue.
868				if err != devicemapper.ErrEnxio {
869					return err
870				}
871				devinfo = nil
872			} else {
873				defer devices.deactivateDevice(baseInfo)
874			}
875		}
876	} else {
877		devinfo, err = devicemapper.GetInfo(baseInfo.Name())
878		if err != nil {
879			return err
880		}
881	}
882
883	doSuspend := devinfo != nil && devinfo.Exists != 0
884
885	if doSuspend {
886		if err = devicemapper.SuspendDevice(baseInfo.Name()); err != nil {
887			return err
888		}
889		defer devicemapper.ResumeDevice(baseInfo.Name())
890	}
891
892	return devices.createRegisterSnapDevice(hash, baseInfo, size)
893}
894
895func (devices *DeviceSet) createRegisterSnapDevice(hash string, baseInfo *devInfo, size uint64) error {
896	deviceID, err := devices.getNextFreeDeviceID()
897	if err != nil {
898		return err
899	}
900
901	logger := logrus.WithField("storage-driver", "devicemapper")
902
903	if err := devices.openTransaction(hash, deviceID); err != nil {
904		logger.Debugf("Error opening transaction hash = %s deviceID = %d", hash, deviceID)
905		devices.markDeviceIDFree(deviceID)
906		return err
907	}
908
909	for {
910		if err := devicemapper.CreateSnapDeviceRaw(devices.getPoolDevName(), deviceID, baseInfo.DeviceID); err != nil {
911			if devicemapper.DeviceIDExists(err) {
912				// Device ID already exists. This should not
913				// happen. Now we have a mechanism to find
914				// a free device ID. So something is not right.
915				// Give a warning and continue.
916				logger.Errorf("Device ID %d exists in pool but it is supposed to be unused", deviceID)
917				deviceID, err = devices.getNextFreeDeviceID()
918				if err != nil {
919					return err
920				}
921				// Save new device id into transaction
922				devices.refreshTransaction(deviceID)
923				continue
924			}
925			logger.Debugf("Error creating snap device: %s", err)
926			devices.markDeviceIDFree(deviceID)
927			return err
928		}
929		break
930	}
931
932	if _, err := devices.registerDevice(deviceID, hash, size, devices.OpenTransactionID); err != nil {
933		devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID)
934		devices.markDeviceIDFree(deviceID)
935		logger.Debugf("Error registering device: %s", err)
936		return err
937	}
938
939	if err := devices.closeTransaction(); err != nil {
940		devices.unregisterDevice(hash)
941		devicemapper.DeleteDevice(devices.getPoolDevName(), deviceID)
942		devices.markDeviceIDFree(deviceID)
943		return err
944	}
945	return nil
946}
947
948func (devices *DeviceSet) loadMetadata(hash string) *devInfo {
949	info := &devInfo{Hash: hash, devices: devices}
950	logger := logrus.WithField("storage-driver", "devicemapper")
951
952	jsonData, err := ioutil.ReadFile(devices.metadataFile(info))
953	if err != nil {
954		logger.Debugf("Failed to read %s with err: %v", devices.metadataFile(info), err)
955		return nil
956	}
957
958	if err := json.Unmarshal(jsonData, &info); err != nil {
959		logger.Debugf("Failed to unmarshal devInfo from %s with err: %v", devices.metadataFile(info), err)
960		return nil
961	}
962
963	if info.DeviceID > maxDeviceID {
964		logger.Errorf("Ignoring Invalid DeviceId=%d", info.DeviceID)
965		return nil
966	}
967
968	return info
969}
970
971func getDeviceUUID(device string) (string, error) {
972	out, err := exec.Command("blkid", "-s", "UUID", "-o", "value", device).Output()
973	if err != nil {
974		return "", fmt.Errorf("devmapper: Failed to find uuid for device %s:%v", device, err)
975	}
976
977	uuid := strings.TrimSuffix(string(out), "\n")
978	uuid = strings.TrimSpace(uuid)
979	logrus.WithField("storage-driver", "devicemapper").Debugf("UUID for device: %s is:%s", device, uuid)
980	return uuid, nil
981}
982
983func (devices *DeviceSet) getBaseDeviceSize() uint64 {
984	info, _ := devices.lookupDevice("")
985	if info == nil {
986		return 0
987	}
988	return info.Size
989}
990
991func (devices *DeviceSet) getBaseDeviceFS() string {
992	return devices.BaseDeviceFilesystem
993}
994
995func (devices *DeviceSet) verifyBaseDeviceUUIDFS(baseInfo *devInfo) error {
996	devices.Lock()
997	defer devices.Unlock()
998
999	if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil {
1000		return err
1001	}
1002	defer devices.deactivateDevice(baseInfo)
1003
1004	uuid, err := getDeviceUUID(baseInfo.DevName())
1005	if err != nil {
1006		return err
1007	}
1008
1009	if devices.BaseDeviceUUID != uuid {
1010		return fmt.Errorf("devmapper: Current Base Device UUID:%s does not match with stored UUID:%s. Possibly using a different thin pool than last invocation", uuid, devices.BaseDeviceUUID)
1011	}
1012
1013	if devices.BaseDeviceFilesystem == "" {
1014		fsType, err := ProbeFsType(baseInfo.DevName())
1015		if err != nil {
1016			return err
1017		}
1018		if err := devices.saveBaseDeviceFilesystem(fsType); err != nil {
1019			return err
1020		}
1021	}
1022
1023	// If user specified a filesystem using dm.fs option and current
1024	// file system of base image is not same, warn user that dm.fs
1025	// will be ignored.
1026	if devices.BaseDeviceFilesystem != devices.filesystem {
1027		logrus.WithField("storage-driver", "devicemapper").Warnf("Base device already exists and has filesystem %s on it. User specified filesystem %s will be ignored.", devices.BaseDeviceFilesystem, devices.filesystem)
1028		devices.filesystem = devices.BaseDeviceFilesystem
1029	}
1030	return nil
1031}
1032
1033func (devices *DeviceSet) saveBaseDeviceFilesystem(fs string) error {
1034	devices.BaseDeviceFilesystem = fs
1035	return devices.saveDeviceSetMetaData()
1036}
1037
1038func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error {
1039	devices.Lock()
1040	defer devices.Unlock()
1041
1042	if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil {
1043		return err
1044	}
1045	defer devices.deactivateDevice(baseInfo)
1046
1047	uuid, err := getDeviceUUID(baseInfo.DevName())
1048	if err != nil {
1049		return err
1050	}
1051
1052	devices.BaseDeviceUUID = uuid
1053	return devices.saveDeviceSetMetaData()
1054}
1055
1056func (devices *DeviceSet) createBaseImage() error {
1057	logrus.WithField("storage-driver", "devicemapper").Debug("Initializing base device-mapper thin volume")
1058
1059	// Create initial device
1060	info, err := devices.createRegisterDevice("")
1061	if err != nil {
1062		return err
1063	}
1064
1065	logrus.WithField("storage-driver", "devicemapper").Debug("Creating filesystem on base device-mapper thin volume")
1066
1067	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
1068		return err
1069	}
1070
1071	if err := devices.createFilesystem(info); err != nil {
1072		return err
1073	}
1074
1075	info.Initialized = true
1076	if err := devices.saveMetadata(info); err != nil {
1077		info.Initialized = false
1078		return err
1079	}
1080
1081	if err := devices.saveBaseDeviceUUID(info); err != nil {
1082		return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err)
1083	}
1084
1085	return nil
1086}
1087
1088// Returns if thin pool device exists or not. If device exists, also makes
1089// sure it is a thin pool device and not some other type of device.
1090func (devices *DeviceSet) thinPoolExists(thinPoolDevice string) (bool, error) {
1091	logrus.WithField("storage-driver", "devicemapper").Debugf("Checking for existence of the pool %s", thinPoolDevice)
1092
1093	info, err := devicemapper.GetInfo(thinPoolDevice)
1094	if err != nil {
1095		return false, fmt.Errorf("devmapper: GetInfo() on device %s failed: %v", thinPoolDevice, err)
1096	}
1097
1098	// Device does not exist.
1099	if info.Exists == 0 {
1100		return false, nil
1101	}
1102
1103	_, _, deviceType, _, err := devicemapper.GetStatus(thinPoolDevice)
1104	if err != nil {
1105		return false, fmt.Errorf("devmapper: GetStatus() on device %s failed: %v", thinPoolDevice, err)
1106	}
1107
1108	if deviceType != "thin-pool" {
1109		return false, fmt.Errorf("devmapper: Device %s is not a thin pool", thinPoolDevice)
1110	}
1111
1112	return true, nil
1113}
1114
1115func (devices *DeviceSet) checkThinPool() error {
1116	_, transactionID, dataUsed, _, _, _, err := devices.poolStatus()
1117	if err != nil {
1118		return err
1119	}
1120	if dataUsed != 0 {
1121		return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) that already has used data blocks",
1122			devices.thinPoolDevice)
1123	}
1124	if transactionID != 0 {
1125		return fmt.Errorf("devmapper: Unable to take ownership of thin-pool (%s) with non-zero transaction ID",
1126			devices.thinPoolDevice)
1127	}
1128	return nil
1129}
1130
1131// Base image is initialized properly. Either save UUID for first time (for
1132// upgrade case or verify UUID.
1133func (devices *DeviceSet) setupVerifyBaseImageUUIDFS(baseInfo *devInfo) error {
1134	// If BaseDeviceUUID is nil (upgrade case), save it and return success.
1135	if devices.BaseDeviceUUID == "" {
1136		if err := devices.saveBaseDeviceUUID(baseInfo); err != nil {
1137			return fmt.Errorf("devmapper: Could not query and save base device UUID:%v", err)
1138		}
1139		return nil
1140	}
1141
1142	if err := devices.verifyBaseDeviceUUIDFS(baseInfo); err != nil {
1143		return fmt.Errorf("devmapper: Base Device UUID and Filesystem verification failed: %v", err)
1144	}
1145
1146	return nil
1147}
1148
1149func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error {
1150
1151	if !userBaseSize {
1152		return nil
1153	}
1154
1155	if devices.baseFsSize < devices.getBaseDeviceSize() {
1156		return fmt.Errorf("devmapper: Base device size cannot be smaller than %s", units.HumanSize(float64(devices.getBaseDeviceSize())))
1157	}
1158
1159	if devices.baseFsSize == devices.getBaseDeviceSize() {
1160		return nil
1161	}
1162
1163	info.lock.Lock()
1164	defer info.lock.Unlock()
1165
1166	devices.Lock()
1167	defer devices.Unlock()
1168
1169	info.Size = devices.baseFsSize
1170
1171	if err := devices.saveMetadata(info); err != nil {
1172		// Try to remove unused device
1173		delete(devices.Devices, info.Hash)
1174		return err
1175	}
1176
1177	return devices.growFS(info)
1178}
1179
1180func (devices *DeviceSet) growFS(info *devInfo) error {
1181	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
1182		return fmt.Errorf("Error activating devmapper device: %s", err)
1183	}
1184
1185	defer devices.deactivateDevice(info)
1186
1187	fsMountPoint := "/run/docker/mnt"
1188	if _, err := os.Stat(fsMountPoint); os.IsNotExist(err) {
1189		if err := os.MkdirAll(fsMountPoint, 0700); err != nil {
1190			return err
1191		}
1192		defer os.RemoveAll(fsMountPoint)
1193	}
1194
1195	options := ""
1196	if devices.BaseDeviceFilesystem == "xfs" {
1197		// XFS needs nouuid or it can't mount filesystems with the same fs
1198		options = joinMountOptions(options, "nouuid")
1199	}
1200	options = joinMountOptions(options, devices.mountOptions)
1201
1202	if err := mount.Mount(info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options); err != nil {
1203		return fmt.Errorf("Error mounting '%s' on '%s' (fstype='%s' options='%s'): %s\n%v", info.DevName(), fsMountPoint, devices.BaseDeviceFilesystem, options, err, string(dmesg.Dmesg(256)))
1204	}
1205
1206	defer unix.Unmount(fsMountPoint, unix.MNT_DETACH)
1207
1208	switch devices.BaseDeviceFilesystem {
1209	case "ext4":
1210		if out, err := exec.Command("resize2fs", info.DevName()).CombinedOutput(); err != nil {
1211			return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out))
1212		}
1213	case "xfs":
1214		if out, err := exec.Command("xfs_growfs", info.DevName()).CombinedOutput(); err != nil {
1215			return fmt.Errorf("Failed to grow rootfs:%v:%s", err, string(out))
1216		}
1217	default:
1218		return fmt.Errorf("Unsupported filesystem type %s", devices.BaseDeviceFilesystem)
1219	}
1220	return nil
1221}
1222
1223func (devices *DeviceSet) setupBaseImage() error {
1224	oldInfo, _ := devices.lookupDeviceWithLock("")
1225
1226	// base image already exists. If it is initialized properly, do UUID
1227	// verification and return. Otherwise remove image and set it up
1228	// fresh.
1229
1230	if oldInfo != nil {
1231		if oldInfo.Initialized && !oldInfo.Deleted {
1232			if err := devices.setupVerifyBaseImageUUIDFS(oldInfo); err != nil {
1233				return err
1234			}
1235			return devices.checkGrowBaseDeviceFS(oldInfo)
1236		}
1237
1238		logrus.WithField("storage-driver", "devicemapper").Debug("Removing uninitialized base image")
1239		// If previous base device is in deferred delete state,
1240		// that needs to be cleaned up first. So don't try
1241		// deferred deletion.
1242		if err := devices.DeleteDevice("", true); err != nil {
1243			return err
1244		}
1245	}
1246
1247	// If we are setting up base image for the first time, make sure
1248	// thin pool is empty.
1249	if devices.thinPoolDevice != "" && oldInfo == nil {
1250		if err := devices.checkThinPool(); err != nil {
1251			return err
1252		}
1253	}
1254
1255	// Create new base image device
1256	return devices.createBaseImage()
1257}
1258
1259func setCloseOnExec(name string) {
1260	fileInfos, _ := ioutil.ReadDir("/proc/self/fd")
1261	for _, i := range fileInfos {
1262		link, _ := os.Readlink(filepath.Join("/proc/self/fd", i.Name()))
1263		if link == name {
1264			fd, err := strconv.Atoi(i.Name())
1265			if err == nil {
1266				unix.CloseOnExec(fd)
1267			}
1268		}
1269	}
1270}
1271
1272func major(device uint64) uint64 {
1273	return (device >> 8) & 0xfff
1274}
1275
1276func minor(device uint64) uint64 {
1277	return (device & 0xff) | ((device >> 12) & 0xfff00)
1278}
1279
1280// ResizePool increases the size of the pool.
1281func (devices *DeviceSet) ResizePool(size int64) error {
1282	dirname := devices.loopbackDir()
1283	datafilename := path.Join(dirname, "data")
1284	if len(devices.dataDevice) > 0 {
1285		datafilename = devices.dataDevice
1286	}
1287	metadatafilename := path.Join(dirname, "metadata")
1288	if len(devices.metadataDevice) > 0 {
1289		metadatafilename = devices.metadataDevice
1290	}
1291
1292	datafile, err := os.OpenFile(datafilename, os.O_RDWR, 0)
1293	if datafile == nil {
1294		return err
1295	}
1296	defer datafile.Close()
1297
1298	fi, err := datafile.Stat()
1299	if fi == nil {
1300		return err
1301	}
1302
1303	if fi.Size() > size {
1304		return fmt.Errorf("devmapper: Can't shrink file")
1305	}
1306
1307	dataloopback := loopback.FindLoopDeviceFor(datafile)
1308	if dataloopback == nil {
1309		return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", datafilename)
1310	}
1311	defer dataloopback.Close()
1312
1313	metadatafile, err := os.OpenFile(metadatafilename, os.O_RDWR, 0)
1314	if metadatafile == nil {
1315		return err
1316	}
1317	defer metadatafile.Close()
1318
1319	metadataloopback := loopback.FindLoopDeviceFor(metadatafile)
1320	if metadataloopback == nil {
1321		return fmt.Errorf("devmapper: Unable to find loopback mount for: %s", metadatafilename)
1322	}
1323	defer metadataloopback.Close()
1324
1325	// Grow loopback file
1326	if err := datafile.Truncate(size); err != nil {
1327		return fmt.Errorf("devmapper: Unable to grow loopback file: %s", err)
1328	}
1329
1330	// Reload size for loopback device
1331	if err := loopback.SetCapacity(dataloopback); err != nil {
1332		return fmt.Errorf("Unable to update loopback capacity: %s", err)
1333	}
1334
1335	// Suspend the pool
1336	if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil {
1337		return fmt.Errorf("devmapper: Unable to suspend pool: %s", err)
1338	}
1339
1340	// Reload with the new block sizes
1341	if err := devicemapper.ReloadPool(devices.getPoolName(), dataloopback, metadataloopback, devices.thinpBlockSize); err != nil {
1342		return fmt.Errorf("devmapper: Unable to reload pool: %s", err)
1343	}
1344
1345	// Resume the pool
1346	if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil {
1347		return fmt.Errorf("devmapper: Unable to resume pool: %s", err)
1348	}
1349
1350	return nil
1351}
1352
1353func (devices *DeviceSet) loadTransactionMetaData() error {
1354	jsonData, err := ioutil.ReadFile(devices.transactionMetaFile())
1355	if err != nil {
1356		// There is no active transaction. This will be the case
1357		// during upgrade.
1358		if os.IsNotExist(err) {
1359			devices.OpenTransactionID = devices.TransactionID
1360			return nil
1361		}
1362		return err
1363	}
1364
1365	json.Unmarshal(jsonData, &devices.transaction)
1366	return nil
1367}
1368
1369func (devices *DeviceSet) saveTransactionMetaData() error {
1370	jsonData, err := json.Marshal(&devices.transaction)
1371	if err != nil {
1372		return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err)
1373	}
1374
1375	return devices.writeMetaFile(jsonData, devices.transactionMetaFile())
1376}
1377
1378func (devices *DeviceSet) removeTransactionMetaData() error {
1379	return os.RemoveAll(devices.transactionMetaFile())
1380}
1381
1382func (devices *DeviceSet) rollbackTransaction() error {
1383	logger := logrus.WithField("storage-driver", "devicemapper")
1384
1385	logger.Debugf("Rolling back open transaction: TransactionID=%d hash=%s device_id=%d", devices.OpenTransactionID, devices.DeviceIDHash, devices.DeviceID)
1386
1387	// A device id might have already been deleted before transaction
1388	// closed. In that case this call will fail. Just leave a message
1389	// in case of failure.
1390	if err := devicemapper.DeleteDevice(devices.getPoolDevName(), devices.DeviceID); err != nil {
1391		logger.Errorf("Unable to delete device: %s", err)
1392	}
1393
1394	dinfo := &devInfo{Hash: devices.DeviceIDHash}
1395	if err := devices.removeMetadata(dinfo); err != nil {
1396		logger.Errorf("Unable to remove metadata: %s", err)
1397	} else {
1398		devices.markDeviceIDFree(devices.DeviceID)
1399	}
1400
1401	if err := devices.removeTransactionMetaData(); err != nil {
1402		logger.Errorf("Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err)
1403	}
1404
1405	return nil
1406}
1407
1408func (devices *DeviceSet) processPendingTransaction() error {
1409	if err := devices.loadTransactionMetaData(); err != nil {
1410		return err
1411	}
1412
1413	// If there was open transaction but pool transaction ID is same
1414	// as open transaction ID, nothing to roll back.
1415	if devices.TransactionID == devices.OpenTransactionID {
1416		return nil
1417	}
1418
1419	// If open transaction ID is less than pool transaction ID, something
1420	// is wrong. Bail out.
1421	if devices.OpenTransactionID < devices.TransactionID {
1422		logrus.WithField("storage-driver", "devicemapper").Errorf("Open Transaction id %d is less than pool transaction id %d", devices.OpenTransactionID, devices.TransactionID)
1423		return nil
1424	}
1425
1426	// Pool transaction ID is not same as open transaction. There is
1427	// a transaction which was not completed.
1428	if err := devices.rollbackTransaction(); err != nil {
1429		return fmt.Errorf("devmapper: Rolling back open transaction failed: %s", err)
1430	}
1431
1432	devices.OpenTransactionID = devices.TransactionID
1433	return nil
1434}
1435
1436func (devices *DeviceSet) loadDeviceSetMetaData() error {
1437	jsonData, err := ioutil.ReadFile(devices.deviceSetMetaFile())
1438	if err != nil {
1439		// For backward compatibility return success if file does
1440		// not exist.
1441		if os.IsNotExist(err) {
1442			return nil
1443		}
1444		return err
1445	}
1446
1447	return json.Unmarshal(jsonData, devices)
1448}
1449
1450func (devices *DeviceSet) saveDeviceSetMetaData() error {
1451	jsonData, err := json.Marshal(devices)
1452	if err != nil {
1453		return fmt.Errorf("devmapper: Error encoding metadata to json: %s", err)
1454	}
1455
1456	return devices.writeMetaFile(jsonData, devices.deviceSetMetaFile())
1457}
1458
1459func (devices *DeviceSet) openTransaction(hash string, DeviceID int) error {
1460	devices.allocateTransactionID()
1461	devices.DeviceIDHash = hash
1462	devices.DeviceID = DeviceID
1463	if err := devices.saveTransactionMetaData(); err != nil {
1464		return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err)
1465	}
1466	return nil
1467}
1468
1469func (devices *DeviceSet) refreshTransaction(DeviceID int) error {
1470	devices.DeviceID = DeviceID
1471	if err := devices.saveTransactionMetaData(); err != nil {
1472		return fmt.Errorf("devmapper: Error saving transaction metadata: %s", err)
1473	}
1474	return nil
1475}
1476
1477func (devices *DeviceSet) closeTransaction() error {
1478	if err := devices.updatePoolTransactionID(); err != nil {
1479		logrus.WithField("storage-driver", "devicemapper").Debug("Failed to close Transaction")
1480		return err
1481	}
1482	return nil
1483}
1484
1485func determineDriverCapabilities(version string) error {
1486	// Kernel driver version >= 4.27.0 support deferred removal
1487
1488	logrus.WithField("storage-driver", "devicemapper").Debugf("kernel dm driver version is %s", version)
1489
1490	versionSplit := strings.Split(version, ".")
1491	major, err := strconv.Atoi(versionSplit[0])
1492	if err != nil {
1493		return graphdriver.ErrNotSupported
1494	}
1495
1496	if major > 4 {
1497		driverDeferredRemovalSupport = true
1498		return nil
1499	}
1500
1501	if major < 4 {
1502		return nil
1503	}
1504
1505	minor, err := strconv.Atoi(versionSplit[1])
1506	if err != nil {
1507		return graphdriver.ErrNotSupported
1508	}
1509
1510	/*
1511	 * If major is 4 and minor is 27, then there is no need to
1512	 * check for patch level as it can not be less than 0.
1513	 */
1514	if minor >= 27 {
1515		driverDeferredRemovalSupport = true
1516		return nil
1517	}
1518
1519	return nil
1520}
1521
1522// Determine the major and minor number of loopback device
1523func getDeviceMajorMinor(file *os.File) (uint64, uint64, error) {
1524	var stat unix.Stat_t
1525	err := unix.Stat(file.Name(), &stat)
1526	if err != nil {
1527		return 0, 0, err
1528	}
1529
1530	dev := stat.Rdev
1531	majorNum := major(dev)
1532	minorNum := minor(dev)
1533
1534	logrus.WithField("storage-driver", "devicemapper").Debugf("Major:Minor for device: %s is:%v:%v", file.Name(), majorNum, minorNum)
1535	return majorNum, minorNum, nil
1536}
1537
1538// Given a file which is backing file of a loop back device, find the
1539// loopback device name and its major/minor number.
1540func getLoopFileDeviceMajMin(filename string) (string, uint64, uint64, error) {
1541	file, err := os.Open(filename)
1542	if err != nil {
1543		logrus.WithField("storage-driver", "devicemapper").Debugf("Failed to open file %s", filename)
1544		return "", 0, 0, err
1545	}
1546
1547	defer file.Close()
1548	loopbackDevice := loopback.FindLoopDeviceFor(file)
1549	if loopbackDevice == nil {
1550		return "", 0, 0, fmt.Errorf("devmapper: Unable to find loopback mount for: %s", filename)
1551	}
1552	defer loopbackDevice.Close()
1553
1554	Major, Minor, err := getDeviceMajorMinor(loopbackDevice)
1555	if err != nil {
1556		return "", 0, 0, err
1557	}
1558	return loopbackDevice.Name(), Major, Minor, nil
1559}
1560
1561// Get the major/minor numbers of thin pool data and metadata devices
1562func (devices *DeviceSet) getThinPoolDataMetaMajMin() (uint64, uint64, uint64, uint64, error) {
1563	var params, poolDataMajMin, poolMetadataMajMin string
1564
1565	_, _, _, params, err := devicemapper.GetTable(devices.getPoolName())
1566	if err != nil {
1567		return 0, 0, 0, 0, err
1568	}
1569
1570	if _, err = fmt.Sscanf(params, "%s %s", &poolMetadataMajMin, &poolDataMajMin); err != nil {
1571		return 0, 0, 0, 0, err
1572	}
1573
1574	logrus.WithField("storage-driver", "devicemapper").Debugf("poolDataMajMin=%s poolMetaMajMin=%s\n", poolDataMajMin, poolMetadataMajMin)
1575
1576	poolDataMajMinorSplit := strings.Split(poolDataMajMin, ":")
1577	poolDataMajor, err := strconv.ParseUint(poolDataMajMinorSplit[0], 10, 32)
1578	if err != nil {
1579		return 0, 0, 0, 0, err
1580	}
1581
1582	poolDataMinor, err := strconv.ParseUint(poolDataMajMinorSplit[1], 10, 32)
1583	if err != nil {
1584		return 0, 0, 0, 0, err
1585	}
1586
1587	poolMetadataMajMinorSplit := strings.Split(poolMetadataMajMin, ":")
1588	poolMetadataMajor, err := strconv.ParseUint(poolMetadataMajMinorSplit[0], 10, 32)
1589	if err != nil {
1590		return 0, 0, 0, 0, err
1591	}
1592
1593	poolMetadataMinor, err := strconv.ParseUint(poolMetadataMajMinorSplit[1], 10, 32)
1594	if err != nil {
1595		return 0, 0, 0, 0, err
1596	}
1597
1598	return poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, nil
1599}
1600
1601func (devices *DeviceSet) loadThinPoolLoopBackInfo() error {
1602	poolDataMajor, poolDataMinor, poolMetadataMajor, poolMetadataMinor, err := devices.getThinPoolDataMetaMajMin()
1603	if err != nil {
1604		return err
1605	}
1606
1607	dirname := devices.loopbackDir()
1608
1609	// data device has not been passed in. So there should be a data file
1610	// which is being mounted as loop device.
1611	if devices.dataDevice == "" {
1612		datafilename := path.Join(dirname, "data")
1613		dataLoopDevice, dataMajor, dataMinor, err := getLoopFileDeviceMajMin(datafilename)
1614		if err != nil {
1615			return err
1616		}
1617
1618		// Compare the two
1619		if poolDataMajor == dataMajor && poolDataMinor == dataMinor {
1620			devices.dataDevice = dataLoopDevice
1621			devices.dataLoopFile = datafilename
1622		}
1623
1624	}
1625
1626	// metadata device has not been passed in. So there should be a
1627	// metadata file which is being mounted as loop device.
1628	if devices.metadataDevice == "" {
1629		metadatafilename := path.Join(dirname, "metadata")
1630		metadataLoopDevice, metadataMajor, metadataMinor, err := getLoopFileDeviceMajMin(metadatafilename)
1631		if err != nil {
1632			return err
1633		}
1634		if poolMetadataMajor == metadataMajor && poolMetadataMinor == metadataMinor {
1635			devices.metadataDevice = metadataLoopDevice
1636			devices.metadataLoopFile = metadatafilename
1637		}
1638	}
1639
1640	return nil
1641}
1642
1643func (devices *DeviceSet) enableDeferredRemovalDeletion() error {
1644
1645	// If user asked for deferred removal then check both libdm library
1646	// and kernel driver support deferred removal otherwise error out.
1647	if enableDeferredRemoval {
1648		if !driverDeferredRemovalSupport {
1649			return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it")
1650		}
1651		if !devicemapper.LibraryDeferredRemovalSupport {
1652			return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it")
1653		}
1654		logrus.WithField("storage-driver", "devicemapper").Debug("Deferred removal support enabled.")
1655		devices.deferredRemove = true
1656	}
1657
1658	if enableDeferredDeletion {
1659		if !devices.deferredRemove {
1660			return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter")
1661		}
1662		logrus.WithField("storage-driver", "devicemapper").Debug("Deferred deletion support enabled.")
1663		devices.deferredDelete = true
1664	}
1665	return nil
1666}
1667
1668func (devices *DeviceSet) initDevmapper(doInit bool) (retErr error) {
1669	if err := devices.enableDeferredRemovalDeletion(); err != nil {
1670		return err
1671	}
1672
1673	logger := logrus.WithField("storage-driver", "devicemapper")
1674
1675	// https://github.com/docker/docker/issues/4036
1676	if supported := devicemapper.UdevSetSyncSupport(true); !supported {
1677		if dockerversion.IAmStatic == "true" {
1678			logger.Error("Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a dynamic binary to use devicemapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/dockerd/#storage-driver-options")
1679		} else {
1680			logger.Error("Udev sync is not supported. This will lead to data loss and unexpected behavior. Install a more recent version of libdevmapper or select a different storage driver. For more information, see https://docs.docker.com/engine/reference/commandline/dockerd/#storage-driver-options")
1681		}
1682
1683		if !devices.overrideUdevSyncCheck {
1684			return graphdriver.ErrNotSupported
1685		}
1686	}
1687
1688	//create the root dir of the devmapper driver ownership to match this
1689	//daemon's remapped root uid/gid so containers can start properly
1690	uid, gid, err := idtools.GetRootUIDGID(devices.uidMaps, devices.gidMaps)
1691	if err != nil {
1692		return err
1693	}
1694	if err := idtools.MkdirAndChown(devices.root, 0700, idtools.Identity{UID: uid, GID: gid}); err != nil {
1695		return err
1696	}
1697	if err := os.MkdirAll(devices.metadataDir(), 0700); err != nil {
1698		return err
1699	}
1700
1701	prevSetupConfig, err := readLVMConfig(devices.root)
1702	if err != nil {
1703		return err
1704	}
1705
1706	if !reflect.DeepEqual(devices.lvmSetupConfig, directLVMConfig{}) {
1707		if devices.thinPoolDevice != "" {
1708			return errors.New("cannot setup direct-lvm when `dm.thinpooldev` is also specified")
1709		}
1710
1711		if !reflect.DeepEqual(prevSetupConfig, devices.lvmSetupConfig) {
1712			if !reflect.DeepEqual(prevSetupConfig, directLVMConfig{}) {
1713				return errors.New("changing direct-lvm config is not supported")
1714			}
1715			logger.WithField("direct-lvm-config", devices.lvmSetupConfig).Debugf("Setting up direct lvm mode")
1716			if err := verifyBlockDevice(devices.lvmSetupConfig.Device, lvmSetupConfigForce); err != nil {
1717				return err
1718			}
1719			if err := setupDirectLVM(devices.lvmSetupConfig); err != nil {
1720				return err
1721			}
1722			if err := writeLVMConfig(devices.root, devices.lvmSetupConfig); err != nil {
1723				return err
1724			}
1725		}
1726		devices.thinPoolDevice = "docker-thinpool"
1727		logger.Debugf("Setting dm.thinpooldev to %q", devices.thinPoolDevice)
1728	}
1729
1730	// Set the device prefix from the device id and inode of the docker root dir
1731	var st unix.Stat_t
1732	if err := unix.Stat(devices.root, &st); err != nil {
1733		return fmt.Errorf("devmapper: Error looking up dir %s: %s", devices.root, err)
1734	}
1735	// "reg-" stands for "regular file".
1736	// In the future we might use "dev-" for "device file", etc.
1737	// docker-maj,min[-inode] stands for:
1738	//	- Managed by docker
1739	//	- The target of this device is at major <maj> and minor <min>
1740	//	- If <inode> is defined, use that file inside the device as a loopback image. Otherwise use the device itself.
1741	devices.devicePrefix = fmt.Sprintf("docker-%d:%d-%d", major(st.Dev), minor(st.Dev), st.Ino)
1742	logger.Debugf("Generated prefix: %s", devices.devicePrefix)
1743
1744	// Check for the existence of the thin-pool device
1745	poolExists, err := devices.thinPoolExists(devices.getPoolName())
1746	if err != nil {
1747		return err
1748	}
1749
1750	// It seems libdevmapper opens this without O_CLOEXEC, and go exec will not close files
1751	// that are not Close-on-exec,
1752	// so we add this badhack to make sure it closes itself
1753	setCloseOnExec("/dev/mapper/control")
1754
1755	// Make sure the sparse images exist in <root>/devicemapper/data and
1756	// <root>/devicemapper/metadata
1757
1758	createdLoopback := false
1759
1760	// If the pool doesn't exist, create it
1761	if !poolExists && devices.thinPoolDevice == "" {
1762		logger.Debug("Pool doesn't exist. Creating it.")
1763
1764		var (
1765			dataFile     *os.File
1766			metadataFile *os.File
1767		)
1768
1769		if devices.dataDevice == "" {
1770			// Make sure the sparse images exist in <root>/devicemapper/data
1771
1772			hasData := devices.hasImage("data")
1773
1774			if !doInit && !hasData {
1775				return errors.New("loopback data file not found")
1776			}
1777
1778			if !hasData {
1779				createdLoopback = true
1780			}
1781
1782			data, err := devices.ensureImage("data", devices.dataLoopbackSize)
1783			if err != nil {
1784				logger.Debugf("Error device ensureImage (data): %s", err)
1785				return err
1786			}
1787
1788			dataFile, err = loopback.AttachLoopDevice(data)
1789			if err != nil {
1790				return err
1791			}
1792			devices.dataLoopFile = data
1793			devices.dataDevice = dataFile.Name()
1794		} else {
1795			dataFile, err = os.OpenFile(devices.dataDevice, os.O_RDWR, 0600)
1796			if err != nil {
1797				return err
1798			}
1799		}
1800		defer dataFile.Close()
1801
1802		if devices.metadataDevice == "" {
1803			// Make sure the sparse images exist in <root>/devicemapper/metadata
1804
1805			hasMetadata := devices.hasImage("metadata")
1806
1807			if !doInit && !hasMetadata {
1808				return errors.New("loopback metadata file not found")
1809			}
1810
1811			if !hasMetadata {
1812				createdLoopback = true
1813			}
1814
1815			metadata, err := devices.ensureImage("metadata", devices.metaDataLoopbackSize)
1816			if err != nil {
1817				logger.Debugf("Error device ensureImage (metadata): %s", err)
1818				return err
1819			}
1820
1821			metadataFile, err = loopback.AttachLoopDevice(metadata)
1822			if err != nil {
1823				return err
1824			}
1825			devices.metadataLoopFile = metadata
1826			devices.metadataDevice = metadataFile.Name()
1827		} else {
1828			metadataFile, err = os.OpenFile(devices.metadataDevice, os.O_RDWR, 0600)
1829			if err != nil {
1830				return err
1831			}
1832		}
1833		defer metadataFile.Close()
1834
1835		if err := devicemapper.CreatePool(devices.getPoolName(), dataFile, metadataFile, devices.thinpBlockSize); err != nil {
1836			return err
1837		}
1838		defer func() {
1839			if retErr != nil {
1840				err = devices.deactivatePool()
1841				if err != nil {
1842					logger.Warnf("Failed to deactivatePool: %v", err)
1843				}
1844			}
1845		}()
1846	}
1847
1848	// Pool already exists and caller did not pass us a pool. That means
1849	// we probably created pool earlier and could not remove it as some
1850	// containers were still using it. Detect some of the properties of
1851	// pool, like is it using loop devices.
1852	if poolExists && devices.thinPoolDevice == "" {
1853		if err := devices.loadThinPoolLoopBackInfo(); err != nil {
1854			logger.Debugf("Failed to load thin pool loopback device information:%v", err)
1855			return err
1856		}
1857	}
1858
1859	// If we didn't just create the data or metadata image, we need to
1860	// load the transaction id and migrate old metadata
1861	if !createdLoopback {
1862		if err := devices.initMetaData(); err != nil {
1863			return err
1864		}
1865	}
1866
1867	if devices.thinPoolDevice == "" {
1868		if devices.metadataLoopFile != "" || devices.dataLoopFile != "" {
1869			logger.Warn("Usage of loopback devices is strongly discouraged for production use. Please use `--storage-opt dm.thinpooldev` or use `man dockerd` to refer to dm.thinpooldev section.")
1870		}
1871	}
1872
1873	// Right now this loads only NextDeviceID. If there is more metadata
1874	// down the line, we might have to move it earlier.
1875	if err := devices.loadDeviceSetMetaData(); err != nil {
1876		return err
1877	}
1878
1879	// Setup the base image
1880	if doInit {
1881		if err := devices.setupBaseImage(); err != nil {
1882			logger.Debugf("Error device setupBaseImage: %s", err)
1883			return err
1884		}
1885	}
1886
1887	return nil
1888}
1889
1890// AddDevice adds a device and registers in the hash.
1891func (devices *DeviceSet) AddDevice(hash, baseHash string, storageOpt map[string]string) error {
1892	logrus.WithField("storage-driver", "devicemapper").Debugf("AddDevice START(hash=%s basehash=%s)", hash, baseHash)
1893	defer logrus.WithField("storage-driver", "devicemapper").Debugf("AddDevice END(hash=%s basehash=%s)", hash, baseHash)
1894
1895	// If a deleted device exists, return error.
1896	baseInfo, err := devices.lookupDeviceWithLock(baseHash)
1897	if err != nil {
1898		return err
1899	}
1900
1901	if baseInfo.Deleted {
1902		return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash)
1903	}
1904
1905	baseInfo.lock.Lock()
1906	defer baseInfo.lock.Unlock()
1907
1908	devices.Lock()
1909	defer devices.Unlock()
1910
1911	// Also include deleted devices in case hash of new device is
1912	// same as one of the deleted devices.
1913	if info, _ := devices.lookupDevice(hash); info != nil {
1914		return fmt.Errorf("devmapper: device %s already exists. Deleted=%v", hash, info.Deleted)
1915	}
1916
1917	size, err := devices.parseStorageOpt(storageOpt)
1918	if err != nil {
1919		return err
1920	}
1921
1922	if size == 0 {
1923		size = baseInfo.Size
1924	}
1925
1926	if size < baseInfo.Size {
1927		return fmt.Errorf("devmapper: Container size cannot be smaller than %s", units.HumanSize(float64(baseInfo.Size)))
1928	}
1929
1930	if err := devices.takeSnapshot(hash, baseInfo, size); err != nil {
1931		return err
1932	}
1933
1934	// Grow the container rootfs.
1935	if size > baseInfo.Size {
1936		info, err := devices.lookupDevice(hash)
1937		if err != nil {
1938			return err
1939		}
1940
1941		if err := devices.growFS(info); err != nil {
1942			return err
1943		}
1944	}
1945
1946	return nil
1947}
1948
1949func (devices *DeviceSet) parseStorageOpt(storageOpt map[string]string) (uint64, error) {
1950
1951	// Read size to change the block device size per container.
1952	for key, val := range storageOpt {
1953		key := strings.ToLower(key)
1954		switch key {
1955		case "size":
1956			size, err := units.RAMInBytes(val)
1957			if err != nil {
1958				return 0, err
1959			}
1960			return uint64(size), nil
1961		default:
1962			return 0, fmt.Errorf("Unknown option %s", key)
1963		}
1964	}
1965
1966	return 0, nil
1967}
1968
1969func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error {
1970	// If device is already in deleted state, there is nothing to be done.
1971	if info.Deleted {
1972		return nil
1973	}
1974
1975	logrus.WithField("storage-driver", "devicemapper").Debugf("Marking device %s for deferred deletion.", info.Hash)
1976
1977	info.Deleted = true
1978
1979	// save device metadata to reflect deleted state.
1980	if err := devices.saveMetadata(info); err != nil {
1981		info.Deleted = false
1982		return err
1983	}
1984
1985	devices.nrDeletedDevices++
1986	return nil
1987}
1988
1989// Should be called with devices.Lock() held.
1990func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error {
1991	if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil {
1992		logrus.WithField("storage-driver", "devicemapper").Debugf("Error opening transaction hash = %s deviceId = %d", "", info.DeviceID)
1993		return err
1994	}
1995
1996	defer devices.closeTransaction()
1997
1998	err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID)
1999	if err != nil {
2000		// If syncDelete is true, we want to return error. If deferred
2001		// deletion is not enabled, we return an error. If error is
2002		// something other then EBUSY, return an error.
2003		if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy {
2004			logrus.WithField("storage-driver", "devicemapper").Debugf("Error deleting device: %s", err)
2005			return err
2006		}
2007	}
2008
2009	if err == nil {
2010		if err := devices.unregisterDevice(info.Hash); err != nil {
2011			return err
2012		}
2013		// If device was already in deferred delete state that means
2014		// deletion was being tried again later. Reduce the deleted
2015		// device count.
2016		if info.Deleted {
2017			devices.nrDeletedDevices--
2018		}
2019		devices.markDeviceIDFree(info.DeviceID)
2020	} else {
2021		if err := devices.markForDeferredDeletion(info); err != nil {
2022			return err
2023		}
2024	}
2025
2026	return nil
2027}
2028
2029// Issue discard only if device open count is zero.
2030func (devices *DeviceSet) issueDiscard(info *devInfo) error {
2031	logger := logrus.WithField("storage-driver", "devicemapper")
2032	logger.Debugf("issueDiscard START(device: %s).", info.Hash)
2033	defer logger.Debugf("issueDiscard END(device: %s).", info.Hash)
2034	// This is a workaround for the kernel not discarding block so
2035	// on the thin pool when we remove a thinp device, so we do it
2036	// manually.
2037	// Even if device is deferred deleted, activate it and issue
2038	// discards.
2039	if err := devices.activateDeviceIfNeeded(info, true); err != nil {
2040		return err
2041	}
2042
2043	devinfo, err := devicemapper.GetInfo(info.Name())
2044	if err != nil {
2045		return err
2046	}
2047
2048	if devinfo.OpenCount != 0 {
2049		logger.Debugf("Device: %s is in use. OpenCount=%d. Not issuing discards.", info.Hash, devinfo.OpenCount)
2050		return nil
2051	}
2052
2053	if err := devicemapper.BlockDeviceDiscard(info.DevName()); err != nil {
2054		logger.Debugf("Error discarding block on device: %s (ignoring)", err)
2055	}
2056	return nil
2057}
2058
2059// Should be called with devices.Lock() held.
2060func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error {
2061	if devices.doBlkDiscard {
2062		devices.issueDiscard(info)
2063	}
2064
2065	// Try to deactivate device in case it is active.
2066	// If deferred removal is enabled and deferred deletion is disabled
2067	// then make sure device is removed synchronously. There have been
2068	// some cases of device being busy for short duration and we would
2069	// rather busy wait for device removal to take care of these cases.
2070	deferredRemove := devices.deferredRemove
2071	if !devices.deferredDelete {
2072		deferredRemove = false
2073	}
2074
2075	if err := devices.deactivateDeviceMode(info, deferredRemove); err != nil {
2076		logrus.WithField("storage-driver", "devicemapper").Debugf("Error deactivating device: %s", err)
2077		return err
2078	}
2079
2080	return devices.deleteTransaction(info, syncDelete)
2081}
2082
2083// DeleteDevice will return success if device has been marked for deferred
2084// removal. If one wants to override that and want DeleteDevice() to fail if
2085// device was busy and could not be deleted, set syncDelete=true.
2086func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error {
2087	logrus.WithField("storage-driver", "devicemapper").Debugf("DeleteDevice START(hash=%v syncDelete=%v)", hash, syncDelete)
2088	defer logrus.WithField("storage-driver", "devicemapper").Debugf("DeleteDevice END(hash=%v syncDelete=%v)", hash, syncDelete)
2089	info, err := devices.lookupDeviceWithLock(hash)
2090	if err != nil {
2091		return err
2092	}
2093
2094	info.lock.Lock()
2095	defer info.lock.Unlock()
2096
2097	devices.Lock()
2098	defer devices.Unlock()
2099
2100	return devices.deleteDevice(info, syncDelete)
2101}
2102
2103func (devices *DeviceSet) deactivatePool() error {
2104	logrus.WithField("storage-driver", "devicemapper").Debug("deactivatePool() START")
2105	defer logrus.WithField("storage-driver", "devicemapper").Debug("deactivatePool() END")
2106	devname := devices.getPoolDevName()
2107
2108	devinfo, err := devicemapper.GetInfo(devname)
2109	if err != nil {
2110		return err
2111	}
2112
2113	if devinfo.Exists == 0 {
2114		return nil
2115	}
2116	if err := devicemapper.RemoveDevice(devname); err != nil {
2117		return err
2118	}
2119
2120	if d, err := devicemapper.GetDeps(devname); err == nil {
2121		logrus.WithField("storage-driver", "devicemapper").Warnf("device %s still has %d active dependents", devname, d.Count)
2122	}
2123
2124	return nil
2125}
2126
2127func (devices *DeviceSet) deactivateDevice(info *devInfo) error {
2128	return devices.deactivateDeviceMode(info, devices.deferredRemove)
2129}
2130
2131func (devices *DeviceSet) deactivateDeviceMode(info *devInfo, deferredRemove bool) error {
2132	var err error
2133	logrus.WithField("storage-driver", "devicemapper").Debugf("deactivateDevice START(%s)", info.Hash)
2134	defer logrus.WithField("storage-driver", "devicemapper").Debugf("deactivateDevice END(%s)", info.Hash)
2135
2136	devinfo, err := devicemapper.GetInfo(info.Name())
2137	if err != nil {
2138		return err
2139	}
2140
2141	if devinfo.Exists == 0 {
2142		return nil
2143	}
2144
2145	if deferredRemove {
2146		err = devicemapper.RemoveDeviceDeferred(info.Name())
2147	} else {
2148		err = devices.removeDevice(info.Name())
2149	}
2150
2151	// This function's semantics is such that it does not return an
2152	// error if device does not exist. So if device went away by
2153	// the time we actually tried to remove it, do not return error.
2154	if err != devicemapper.ErrEnxio {
2155		return err
2156	}
2157	return nil
2158}
2159
2160// Issues the underlying dm remove operation.
2161func (devices *DeviceSet) removeDevice(devname string) error {
2162	var err error
2163
2164	logrus.WithField("storage-driver", "devicemapper").Debugf("removeDevice START(%s)", devname)
2165	defer logrus.WithField("storage-driver", "devicemapper").Debugf("removeDevice END(%s)", devname)
2166
2167	for i := 0; i < 200; i++ {
2168		err = devicemapper.RemoveDevice(devname)
2169		if err == nil {
2170			break
2171		}
2172		if err != devicemapper.ErrBusy {
2173			return err
2174		}
2175
2176		// If we see EBUSY it may be a transient error,
2177		// sleep a bit a retry a few times.
2178		devices.Unlock()
2179		time.Sleep(100 * time.Millisecond)
2180		devices.Lock()
2181	}
2182
2183	return err
2184}
2185
2186func (devices *DeviceSet) cancelDeferredRemovalIfNeeded(info *devInfo) error {
2187	if !devices.deferredRemove {
2188		return nil
2189	}
2190
2191	logrus.WithField("storage-driver", "devicemapper").Debugf("cancelDeferredRemovalIfNeeded START(%s)", info.Name())
2192	defer logrus.WithField("storage-driver", "devicemapper").Debugf("cancelDeferredRemovalIfNeeded END(%s)", info.Name())
2193
2194	devinfo, err := devicemapper.GetInfoWithDeferred(info.Name())
2195	if err != nil {
2196		return err
2197	}
2198
2199	if devinfo != nil && devinfo.DeferredRemove == 0 {
2200		return nil
2201	}
2202
2203	// Cancel deferred remove
2204	if err := devices.cancelDeferredRemoval(info); err != nil {
2205		// If Error is ErrEnxio. Device is probably already gone. Continue.
2206		if err != devicemapper.ErrEnxio {
2207			return err
2208		}
2209	}
2210	return nil
2211}
2212
2213func (devices *DeviceSet) cancelDeferredRemoval(info *devInfo) error {
2214	logrus.WithField("storage-driver", "devicemapper").Debugf("cancelDeferredRemoval START(%s)", info.Name())
2215	defer logrus.WithField("storage-driver", "devicemapper").Debugf("cancelDeferredRemoval END(%s)", info.Name())
2216
2217	var err error
2218
2219	// Cancel deferred remove
2220	for i := 0; i < 100; i++ {
2221		err = devicemapper.CancelDeferredRemove(info.Name())
2222		if err != nil {
2223			if err == devicemapper.ErrBusy {
2224				// If we see EBUSY it may be a transient error,
2225				// sleep a bit a retry a few times.
2226				devices.Unlock()
2227				time.Sleep(100 * time.Millisecond)
2228				devices.Lock()
2229				continue
2230			}
2231		}
2232		break
2233	}
2234	return err
2235}
2236
2237func (devices *DeviceSet) unmountAndDeactivateAll(dir string) {
2238	logger := logrus.WithField("storage-driver", "devicemapper")
2239
2240	files, err := ioutil.ReadDir(dir)
2241	if err != nil {
2242		logger.Warnf("unmountAndDeactivate: %s", err)
2243		return
2244	}
2245
2246	for _, d := range files {
2247		if !d.IsDir() {
2248			continue
2249		}
2250
2251		name := d.Name()
2252		fullname := path.Join(dir, name)
2253
2254		// We use MNT_DETACH here in case it is still busy in some running
2255		// container. This means it'll go away from the global scope directly,
2256		// and the device will be released when that container dies.
2257		if err := unix.Unmount(fullname, unix.MNT_DETACH); err != nil && err != unix.EINVAL {
2258			logger.Warnf("Shutdown unmounting %s, error: %s", fullname, err)
2259		}
2260
2261		if devInfo, err := devices.lookupDevice(name); err != nil {
2262			logger.Debugf("Shutdown lookup device %s, error: %s", name, err)
2263		} else {
2264			if err := devices.deactivateDevice(devInfo); err != nil {
2265				logger.Debugf("Shutdown deactivate %s, error: %s", devInfo.Hash, err)
2266			}
2267		}
2268	}
2269}
2270
2271// Shutdown shuts down the device by unmounting the root.
2272func (devices *DeviceSet) Shutdown(home string) error {
2273	logger := logrus.WithField("storage-driver", "devicemapper")
2274
2275	logger.Debugf("[deviceset %s] Shutdown()", devices.devicePrefix)
2276	logger.Debugf("Shutting down DeviceSet: %s", devices.root)
2277	defer logger.Debugf("[deviceset %s] Shutdown() END", devices.devicePrefix)
2278
2279	// Stop deletion worker. This should start delivering new events to
2280	// ticker channel. That means no new instance of cleanupDeletedDevice()
2281	// will run after this call. If one instance is already running at
2282	// the time of the call, it must be holding devices.Lock() and
2283	// we will block on this lock till cleanup function exits.
2284	devices.deletionWorkerTicker.Stop()
2285
2286	devices.Lock()
2287	// Save DeviceSet Metadata first. Docker kills all threads if they
2288	// don't finish in certain time. It is possible that Shutdown()
2289	// routine does not finish in time as we loop trying to deactivate
2290	// some devices while these are busy. In that case shutdown() routine
2291	// will be killed and we will not get a chance to save deviceset
2292	// metadata. Hence save this early before trying to deactivate devices.
2293	devices.saveDeviceSetMetaData()
2294	devices.unmountAndDeactivateAll(path.Join(home, "mnt"))
2295	devices.Unlock()
2296
2297	info, _ := devices.lookupDeviceWithLock("")
2298	if info != nil {
2299		info.lock.Lock()
2300		devices.Lock()
2301		if err := devices.deactivateDevice(info); err != nil {
2302			logger.Debugf("Shutdown deactivate base , error: %s", err)
2303		}
2304		devices.Unlock()
2305		info.lock.Unlock()
2306	}
2307
2308	devices.Lock()
2309	if devices.thinPoolDevice == "" {
2310		if err := devices.deactivatePool(); err != nil {
2311			logger.Debugf("Shutdown deactivate pool , error: %s", err)
2312		}
2313	}
2314	devices.Unlock()
2315
2316	return nil
2317}
2318
2319// Recent XFS changes allow changing behavior of filesystem in case of errors.
2320// When thin pool gets full and XFS gets ENOSPC error, currently it tries
2321// IO infinitely and sometimes it can block the container process
2322// and process can't be killWith 0 value, XFS will not retry upon error
2323// and instead will shutdown filesystem.
2324
2325func (devices *DeviceSet) xfsSetNospaceRetries(info *devInfo) error {
2326	dmDevicePath, err := os.Readlink(info.DevName())
2327	if err != nil {
2328		return fmt.Errorf("devmapper: readlink failed for device %v:%v", info.DevName(), err)
2329	}
2330
2331	dmDeviceName := path.Base(dmDevicePath)
2332	filePath := "/sys/fs/xfs/" + dmDeviceName + "/error/metadata/ENOSPC/max_retries"
2333	maxRetriesFile, err := os.OpenFile(filePath, os.O_WRONLY, 0)
2334	if err != nil {
2335		return fmt.Errorf("devmapper: user specified daemon option dm.xfs_nospace_max_retries but it does not seem to be supported on this system :%v", err)
2336	}
2337	defer maxRetriesFile.Close()
2338
2339	// Set max retries to 0
2340	_, err = maxRetriesFile.WriteString(devices.xfsNospaceRetries)
2341	if err != nil {
2342		return fmt.Errorf("devmapper: Failed to write string %v to file %v:%v", devices.xfsNospaceRetries, filePath, err)
2343	}
2344	return nil
2345}
2346
2347// MountDevice mounts the device if not already mounted.
2348func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
2349	info, err := devices.lookupDeviceWithLock(hash)
2350	if err != nil {
2351		return err
2352	}
2353
2354	if info.Deleted {
2355		return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash)
2356	}
2357
2358	info.lock.Lock()
2359	defer info.lock.Unlock()
2360
2361	devices.Lock()
2362	defer devices.Unlock()
2363
2364	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
2365		return fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err)
2366	}
2367
2368	fstype, err := ProbeFsType(info.DevName())
2369	if err != nil {
2370		return err
2371	}
2372
2373	options := ""
2374
2375	if fstype == "xfs" {
2376		// XFS needs nouuid or it can't mount filesystems with the same fs
2377		options = joinMountOptions(options, "nouuid")
2378	}
2379
2380	options = joinMountOptions(options, devices.mountOptions)
2381	options = joinMountOptions(options, label.FormatMountLabel("", mountLabel))
2382
2383	if err := mount.Mount(info.DevName(), path, fstype, options); err != nil {
2384		return fmt.Errorf("devmapper: Error mounting '%s' on '%s' (fstype='%s' options='%s'): %s\n%v", info.DevName(), path, fstype, options, err, string(dmesg.Dmesg(256)))
2385	}
2386
2387	if fstype == "xfs" && devices.xfsNospaceRetries != "" {
2388		if err := devices.xfsSetNospaceRetries(info); err != nil {
2389			unix.Unmount(path, unix.MNT_DETACH)
2390			devices.deactivateDevice(info)
2391			return err
2392		}
2393	}
2394
2395	return nil
2396}
2397
2398// UnmountDevice unmounts the device and removes it from hash.
2399func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error {
2400	logger := logrus.WithField("storage-driver", "devicemapper")
2401
2402	logger.Debugf("UnmountDevice START(hash=%s)", hash)
2403	defer logger.Debugf("UnmountDevice END(hash=%s)", hash)
2404
2405	info, err := devices.lookupDeviceWithLock(hash)
2406	if err != nil {
2407		return err
2408	}
2409
2410	info.lock.Lock()
2411	defer info.lock.Unlock()
2412
2413	devices.Lock()
2414	defer devices.Unlock()
2415
2416	logger.Debugf("Unmount(%s)", mountPath)
2417	if err := unix.Unmount(mountPath, unix.MNT_DETACH); err != nil {
2418		return err
2419	}
2420	logger.Debug("Unmount done")
2421
2422	// Remove the mountpoint here. Removing the mountpoint (in newer kernels)
2423	// will cause all other instances of this mount in other mount namespaces
2424	// to be killed (this is an anti-DoS measure that is necessary for things
2425	// like devicemapper). This is necessary to avoid cases where a libdm mount
2426	// that is present in another namespace will cause subsequent RemoveDevice
2427	// operations to fail. We ignore any errors here because this may fail on
2428	// older kernels which don't have
2429	// torvalds/linux@8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe applied.
2430	if err := os.Remove(mountPath); err != nil {
2431		logger.Debugf("error doing a remove on unmounted device %s: %v", mountPath, err)
2432	}
2433
2434	return devices.deactivateDevice(info)
2435}
2436
2437// HasDevice returns true if the device metadata exists.
2438func (devices *DeviceSet) HasDevice(hash string) bool {
2439	info, _ := devices.lookupDeviceWithLock(hash)
2440	return info != nil
2441}
2442
2443// List returns a list of device ids.
2444func (devices *DeviceSet) List() []string {
2445	devices.Lock()
2446	defer devices.Unlock()
2447
2448	ids := make([]string, len(devices.Devices))
2449	i := 0
2450	for k := range devices.Devices {
2451		ids[i] = k
2452		i++
2453	}
2454	return ids
2455}
2456
2457func (devices *DeviceSet) deviceStatus(devName string) (sizeInSectors, mappedSectors, highestMappedSector uint64, err error) {
2458	var params string
2459	_, sizeInSectors, _, params, err = devicemapper.GetStatus(devName)
2460	if err != nil {
2461		return
2462	}
2463	if _, err = fmt.Sscanf(params, "%d %d", &mappedSectors, &highestMappedSector); err == nil {
2464		return
2465	}
2466	return
2467}
2468
2469// GetDeviceStatus provides size, mapped sectors
2470func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
2471	info, err := devices.lookupDeviceWithLock(hash)
2472	if err != nil {
2473		return nil, err
2474	}
2475
2476	info.lock.Lock()
2477	defer info.lock.Unlock()
2478
2479	devices.Lock()
2480	defer devices.Unlock()
2481
2482	status := &DevStatus{
2483		DeviceID:      info.DeviceID,
2484		Size:          info.Size,
2485		TransactionID: info.TransactionID,
2486	}
2487
2488	if err := devices.activateDeviceIfNeeded(info, false); err != nil {
2489		return nil, fmt.Errorf("devmapper: Error activating devmapper device for '%s': %s", hash, err)
2490	}
2491
2492	sizeInSectors, mappedSectors, highestMappedSector, err := devices.deviceStatus(info.DevName())
2493
2494	if err != nil {
2495		return nil, err
2496	}
2497
2498	status.SizeInSectors = sizeInSectors
2499	status.MappedSectors = mappedSectors
2500	status.HighestMappedSector = highestMappedSector
2501
2502	return status, nil
2503}
2504
2505func (devices *DeviceSet) poolStatus() (totalSizeInSectors, transactionID, dataUsed, dataTotal, metadataUsed, metadataTotal uint64, err error) {
2506	var params string
2507	if _, totalSizeInSectors, _, params, err = devicemapper.GetStatus(devices.getPoolName()); err == nil {
2508		_, err = fmt.Sscanf(params, "%d %d/%d %d/%d", &transactionID, &metadataUsed, &metadataTotal, &dataUsed, &dataTotal)
2509	}
2510	return
2511}
2512
2513// DataDevicePath returns the path to the data storage for this deviceset,
2514// regardless of loopback or block device
2515func (devices *DeviceSet) DataDevicePath() string {
2516	return devices.dataDevice
2517}
2518
2519// MetadataDevicePath returns the path to the metadata storage for this deviceset,
2520// regardless of loopback or block device
2521func (devices *DeviceSet) MetadataDevicePath() string {
2522	return devices.metadataDevice
2523}
2524
2525func (devices *DeviceSet) getUnderlyingAvailableSpace(loopFile string) (uint64, error) {
2526	buf := new(unix.Statfs_t)
2527	if err := unix.Statfs(loopFile, buf); err != nil {
2528		logrus.WithField("storage-driver", "devicemapper").Warnf("Couldn't stat loopfile filesystem %v: %v", loopFile, err)
2529		return 0, err
2530	}
2531	return buf.Bfree * uint64(buf.Bsize), nil
2532}
2533
2534func (devices *DeviceSet) isRealFile(loopFile string) (bool, error) {
2535	if loopFile != "" {
2536		fi, err := os.Stat(loopFile)
2537		if err != nil {
2538			logrus.WithField("storage-driver", "devicemapper").Warnf("Couldn't stat loopfile %v: %v", loopFile, err)
2539			return false, err
2540		}
2541		return fi.Mode().IsRegular(), nil
2542	}
2543	return false, nil
2544}
2545
2546// Status returns the current status of this deviceset
2547func (devices *DeviceSet) Status() *Status {
2548	devices.Lock()
2549	defer devices.Unlock()
2550
2551	status := &Status{}
2552
2553	status.PoolName = devices.getPoolName()
2554	status.DataFile = devices.DataDevicePath()
2555	status.DataLoopback = devices.dataLoopFile
2556	status.MetadataFile = devices.MetadataDevicePath()
2557	status.MetadataLoopback = devices.metadataLoopFile
2558	status.UdevSyncSupported = devicemapper.UdevSyncSupported()
2559	status.DeferredRemoveEnabled = devices.deferredRemove
2560	status.DeferredDeleteEnabled = devices.deferredDelete
2561	status.DeferredDeletedDeviceCount = devices.nrDeletedDevices
2562	status.BaseDeviceSize = devices.getBaseDeviceSize()
2563	status.BaseDeviceFS = devices.getBaseDeviceFS()
2564
2565	totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus()
2566	if err == nil {
2567		// Convert from blocks to bytes
2568		blockSizeInSectors := totalSizeInSectors / dataTotal
2569
2570		status.Data.Used = dataUsed * blockSizeInSectors * 512
2571		status.Data.Total = dataTotal * blockSizeInSectors * 512
2572		status.Data.Available = status.Data.Total - status.Data.Used
2573
2574		// metadata blocks are always 4k
2575		status.Metadata.Used = metadataUsed * 4096
2576		status.Metadata.Total = metadataTotal * 4096
2577		status.Metadata.Available = status.Metadata.Total - status.Metadata.Used
2578
2579		status.SectorSize = blockSizeInSectors * 512
2580
2581		if check, _ := devices.isRealFile(devices.dataLoopFile); check {
2582			actualSpace, err := devices.getUnderlyingAvailableSpace(devices.dataLoopFile)
2583			if err == nil && actualSpace < status.Data.Available {
2584				status.Data.Available = actualSpace
2585			}
2586		}
2587
2588		if check, _ := devices.isRealFile(devices.metadataLoopFile); check {
2589			actualSpace, err := devices.getUnderlyingAvailableSpace(devices.metadataLoopFile)
2590			if err == nil && actualSpace < status.Metadata.Available {
2591				status.Metadata.Available = actualSpace
2592			}
2593		}
2594
2595		minFreeData := (dataTotal * uint64(devices.minFreeSpacePercent)) / 100
2596		status.MinFreeSpace = minFreeData * blockSizeInSectors * 512
2597	}
2598
2599	return status
2600}
2601
2602// Status returns the current status of this deviceset
2603func (devices *DeviceSet) exportDeviceMetadata(hash string) (*deviceMetadata, error) {
2604	info, err := devices.lookupDeviceWithLock(hash)
2605	if err != nil {
2606		return nil, err
2607	}
2608
2609	info.lock.Lock()
2610	defer info.lock.Unlock()
2611
2612	metadata := &deviceMetadata{info.DeviceID, info.Size, info.Name()}
2613	return metadata, nil
2614}
2615
2616// NewDeviceSet creates the device set based on the options provided.
2617func NewDeviceSet(root string, doInit bool, options []string, uidMaps, gidMaps []idtools.IDMap) (*DeviceSet, error) {
2618	devicemapper.SetDevDir("/dev")
2619
2620	devices := &DeviceSet{
2621		root:                  root,
2622		metaData:              metaData{Devices: make(map[string]*devInfo)},
2623		dataLoopbackSize:      defaultDataLoopbackSize,
2624		metaDataLoopbackSize:  defaultMetaDataLoopbackSize,
2625		baseFsSize:            defaultBaseFsSize,
2626		overrideUdevSyncCheck: defaultUdevSyncOverride,
2627		doBlkDiscard:          true,
2628		thinpBlockSize:        defaultThinpBlockSize,
2629		deviceIDMap:           make([]byte, deviceIDMapSz),
2630		deletionWorkerTicker:  time.NewTicker(time.Second * 30),
2631		uidMaps:               uidMaps,
2632		gidMaps:               gidMaps,
2633		minFreeSpacePercent:   defaultMinFreeSpacePercent,
2634	}
2635
2636	version, err := devicemapper.GetDriverVersion()
2637	if err != nil {
2638		// Can't even get driver version, assume not supported
2639		return nil, graphdriver.ErrNotSupported
2640	}
2641
2642	if err := determineDriverCapabilities(version); err != nil {
2643		return nil, graphdriver.ErrNotSupported
2644	}
2645
2646	if driverDeferredRemovalSupport && devicemapper.LibraryDeferredRemovalSupport {
2647		// enable deferred stuff by default
2648		enableDeferredDeletion = true
2649		enableDeferredRemoval = true
2650	}
2651
2652	foundBlkDiscard := false
2653	var lvmSetupConfig directLVMConfig
2654	for _, option := range options {
2655		key, val, err := parsers.ParseKeyValueOpt(option)
2656		if err != nil {
2657			return nil, err
2658		}
2659		key = strings.ToLower(key)
2660		switch key {
2661		case "dm.basesize":
2662			size, err := units.RAMInBytes(val)
2663			if err != nil {
2664				return nil, err
2665			}
2666			userBaseSize = true
2667			devices.baseFsSize = uint64(size)
2668		case "dm.loopdatasize":
2669			size, err := units.RAMInBytes(val)
2670			if err != nil {
2671				return nil, err
2672			}
2673			devices.dataLoopbackSize = size
2674		case "dm.loopmetadatasize":
2675			size, err := units.RAMInBytes(val)
2676			if err != nil {
2677				return nil, err
2678			}
2679			devices.metaDataLoopbackSize = size
2680		case "dm.fs":
2681			if val != "ext4" && val != "xfs" {
2682				return nil, fmt.Errorf("devmapper: Unsupported filesystem %s", val)
2683			}
2684			devices.filesystem = val
2685		case "dm.mkfsarg":
2686			devices.mkfsArgs = append(devices.mkfsArgs, val)
2687		case "dm.mountopt":
2688			devices.mountOptions = joinMountOptions(devices.mountOptions, val)
2689		case "dm.metadatadev":
2690			devices.metadataDevice = val
2691		case "dm.datadev":
2692			devices.dataDevice = val
2693		case "dm.thinpooldev":
2694			devices.thinPoolDevice = strings.TrimPrefix(val, "/dev/mapper/")
2695		case "dm.blkdiscard":
2696			foundBlkDiscard = true
2697			devices.doBlkDiscard, err = strconv.ParseBool(val)
2698			if err != nil {
2699				return nil, err
2700			}
2701		case "dm.blocksize":
2702			size, err := units.RAMInBytes(val)
2703			if err != nil {
2704				return nil, err
2705			}
2706			// convert to 512b sectors
2707			devices.thinpBlockSize = uint32(size) >> 9
2708		case "dm.override_udev_sync_check":
2709			devices.overrideUdevSyncCheck, err = strconv.ParseBool(val)
2710			if err != nil {
2711				return nil, err
2712			}
2713
2714		case "dm.use_deferred_removal":
2715			enableDeferredRemoval, err = strconv.ParseBool(val)
2716			if err != nil {
2717				return nil, err
2718			}
2719
2720		case "dm.use_deferred_deletion":
2721			enableDeferredDeletion, err = strconv.ParseBool(val)
2722			if err != nil {
2723				return nil, err
2724			}
2725
2726		case "dm.min_free_space":
2727			if !strings.HasSuffix(val, "%") {
2728				return nil, fmt.Errorf("devmapper: Option dm.min_free_space requires %% suffix")
2729			}
2730
2731			valstring := strings.TrimSuffix(val, "%")
2732			minFreeSpacePercent, err := strconv.ParseUint(valstring, 10, 32)
2733			if err != nil {
2734				return nil, err
2735			}
2736
2737			if minFreeSpacePercent >= 100 {
2738				return nil, fmt.Errorf("devmapper: Invalid value %v for option dm.min_free_space", val)
2739			}
2740
2741			devices.minFreeSpacePercent = uint32(minFreeSpacePercent)
2742		case "dm.xfs_nospace_max_retries":
2743			_, err := strconv.ParseUint(val, 10, 64)
2744			if err != nil {
2745				return nil, err
2746			}
2747			devices.xfsNospaceRetries = val
2748		case "dm.directlvm_device":
2749			lvmSetupConfig.Device = val
2750		case "dm.directlvm_device_force":
2751			lvmSetupConfigForce, err = strconv.ParseBool(val)
2752			if err != nil {
2753				return nil, err
2754			}
2755		case "dm.thinp_percent":
2756			per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
2757			if err != nil {
2758				return nil, errors.Wrapf(err, "could not parse `dm.thinp_percent=%s`", val)
2759			}
2760			if per >= 100 {
2761				return nil, errors.New("dm.thinp_percent must be greater than 0 and less than 100")
2762			}
2763			lvmSetupConfig.ThinpPercent = per
2764		case "dm.thinp_metapercent":
2765			per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
2766			if err != nil {
2767				return nil, errors.Wrapf(err, "could not parse `dm.thinp_metapercent=%s`", val)
2768			}
2769			if per >= 100 {
2770				return nil, errors.New("dm.thinp_metapercent must be greater than 0 and less than 100")
2771			}
2772			lvmSetupConfig.ThinpMetaPercent = per
2773		case "dm.thinp_autoextend_percent":
2774			per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
2775			if err != nil {
2776				return nil, errors.Wrapf(err, "could not parse `dm.thinp_autoextend_percent=%s`", val)
2777			}
2778			if per > 100 {
2779				return nil, errors.New("dm.thinp_autoextend_percent must be greater than 0 and less than 100")
2780			}
2781			lvmSetupConfig.AutoExtendPercent = per
2782		case "dm.thinp_autoextend_threshold":
2783			per, err := strconv.ParseUint(strings.TrimSuffix(val, "%"), 10, 32)
2784			if err != nil {
2785				return nil, errors.Wrapf(err, "could not parse `dm.thinp_autoextend_threshold=%s`", val)
2786			}
2787			if per > 100 {
2788				return nil, errors.New("dm.thinp_autoextend_threshold must be greater than 0 and less than 100")
2789			}
2790			lvmSetupConfig.AutoExtendThreshold = per
2791		case "dm.libdm_log_level":
2792			level, err := strconv.ParseInt(val, 10, 32)
2793			if err != nil {
2794				return nil, errors.Wrapf(err, "could not parse `dm.libdm_log_level=%s`", val)
2795			}
2796			if level < devicemapper.LogLevelFatal || level > devicemapper.LogLevelDebug {
2797				return nil, errors.Errorf("dm.libdm_log_level must be in range [%d,%d]", devicemapper.LogLevelFatal, devicemapper.LogLevelDebug)
2798			}
2799			// Register a new logging callback with the specified level.
2800			devicemapper.LogInit(devicemapper.DefaultLogger{
2801				Level: int(level),
2802			})
2803		default:
2804			return nil, fmt.Errorf("devmapper: Unknown option %s", key)
2805		}
2806	}
2807
2808	if err := validateLVMConfig(lvmSetupConfig); err != nil {
2809		return nil, err
2810	}
2811
2812	devices.lvmSetupConfig = lvmSetupConfig
2813
2814	// By default, don't do blk discard hack on raw devices, its rarely useful and is expensive
2815	if !foundBlkDiscard && (devices.dataDevice != "" || devices.thinPoolDevice != "") {
2816		devices.doBlkDiscard = false
2817	}
2818
2819	if err := devices.initDevmapper(doInit); err != nil {
2820		return nil, err
2821	}
2822
2823	return devices, nil
2824}
2825