1// +build linux
2
3/*
4   Copyright The containerd Authors.
5
6   Licensed under the Apache License, Version 2.0 (the "License");
7   you may not use this file except in compliance with the License.
8   You may obtain a copy of the License at
9
10       http://www.apache.org/licenses/LICENSE-2.0
11
12   Unless required by applicable law or agreed to in writing, software
13   distributed under the License is distributed on an "AS IS" BASIS,
14   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   See the License for the specific language governing permissions and
16   limitations under the License.
17*/
18
19package devmapper
20
21import (
22	"context"
23	"path/filepath"
24	"strconv"
25	"time"
26
27	"github.com/hashicorp/go-multierror"
28	"github.com/pkg/errors"
29	"golang.org/x/sys/unix"
30
31	"github.com/containerd/containerd/log"
32	"github.com/containerd/containerd/snapshots/devmapper/dmsetup"
33)
34
35// PoolDevice ties together data and metadata volumes, represents thin-pool and manages volumes, snapshots and device ids.
36type PoolDevice struct {
37	poolName string
38	metadata *PoolMetadata
39}
40
41// NewPoolDevice creates new thin-pool from existing data and metadata volumes.
42// If pool 'poolName' already exists, it'll be reloaded with new parameters.
43func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) {
44	log.G(ctx).Infof("initializing pool device %q", config.PoolName)
45
46	version, err := dmsetup.Version()
47	if err != nil {
48		log.G(ctx).Errorf("dmsetup not available")
49		return nil, err
50	}
51
52	log.G(ctx).Infof("using dmsetup:\n%s", version)
53
54	dbpath := filepath.Join(config.RootPath, config.PoolName+".db")
55	poolMetaStore, err := NewPoolMetadata(dbpath)
56	if err != nil {
57		return nil, err
58	}
59
60	// Make sure pool exists and available
61	poolPath := dmsetup.GetFullDevicePath(config.PoolName)
62	if _, err := dmsetup.Info(poolPath); err != nil {
63		return nil, errors.Wrapf(err, "failed to query pool %q", poolPath)
64	}
65
66	poolDevice := &PoolDevice{
67		poolName: config.PoolName,
68		metadata: poolMetaStore,
69	}
70
71	if err := poolDevice.ensureDeviceStates(ctx); err != nil {
72		return nil, errors.Wrap(err, "failed to check devices state")
73	}
74
75	return poolDevice, nil
76}
77
78// ensureDeviceStates updates devices to their real state:
79//   - marks devices with incomplete states (after crash) as 'Faulty'
80//   - activates devices if they are marked as 'Activated' but the dm
81//     device is not active, which can happen to a stopped container
82//     after a reboot
83func (p *PoolDevice) ensureDeviceStates(ctx context.Context) error {
84	var faultyDevices []*DeviceInfo
85	var activatedDevices []*DeviceInfo
86
87	if err := p.WalkDevices(ctx, func(info *DeviceInfo) error {
88		switch info.State {
89		case Suspended, Resumed, Deactivated, Removed, Faulty:
90		case Activated:
91			activatedDevices = append(activatedDevices, info)
92		default:
93			faultyDevices = append(faultyDevices, info)
94		}
95		return nil
96	}); err != nil {
97		return errors.Wrap(err, "failed to query devices from metastore")
98	}
99
100	var result *multierror.Error
101	for _, dev := range activatedDevices {
102		if p.IsActivated(dev.Name) {
103			continue
104		}
105
106		log.G(ctx).Warnf("devmapper device %q marked as %q but not active, activating it", dev.Name, dev.State)
107		if err := p.activateDevice(ctx, dev); err != nil {
108			result = multierror.Append(result, err)
109		}
110	}
111
112	for _, dev := range faultyDevices {
113		log.G(ctx).
114			WithField("dev_id", dev.DeviceID).
115			WithField("parent", dev.ParentName).
116			WithField("error", dev.Error).
117			Warnf("devmapper device %q has invalid state %q, marking as faulty", dev.Name, dev.State)
118
119		if err := p.metadata.MarkFaulty(ctx, dev.Name); err != nil {
120			result = multierror.Append(result, err)
121		}
122	}
123
124	return multierror.Prefix(result.ErrorOrNil(), "devmapper:")
125}
126
127// transition invokes 'updateStateFn' callback to perform devmapper operation and reflects device state changes/errors in meta store.
128// 'tryingState' will be set before invoking callback. If callback succeeded 'successState' will be set, otherwise
129// error details will be recorded in meta store.
130func (p *PoolDevice) transition(ctx context.Context, deviceName string, tryingState DeviceState, successState DeviceState, updateStateFn func() error) error {
131	// Set device to trying state
132	uerr := p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error {
133		deviceInfo.State = tryingState
134		return nil
135	})
136
137	if uerr != nil {
138		return errors.Wrapf(uerr, "failed to set device %q state to %q", deviceName, tryingState)
139	}
140
141	var result *multierror.Error
142
143	// Invoke devmapper operation
144	err := updateStateFn()
145
146	if err != nil {
147		result = multierror.Append(result, err)
148	}
149
150	// If operation succeeded transition to success state, otherwise save error details
151	uerr = p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error {
152		if err == nil {
153			deviceInfo.State = successState
154			deviceInfo.Error = ""
155		} else {
156			deviceInfo.Error = err.Error()
157		}
158		return nil
159	})
160
161	if uerr != nil {
162		result = multierror.Append(result, uerr)
163	}
164
165	return unwrapError(result)
166}
167
168// unwrapError converts multierror.Error to the original error when it is possible.
169// multierror 1.1.0 has the similar function named Unwrap, but it requires Go 1.14.
170func unwrapError(e *multierror.Error) error {
171	if e == nil {
172		return nil
173	}
174
175	// If the error can be expressed without multierror, return the original error.
176	if len(e.Errors) == 1 {
177		return e.Errors[0]
178	}
179
180	return e.ErrorOrNil()
181}
182
183// CreateThinDevice creates new devmapper thin-device with given name and size.
184// Device ID for thin-device will be allocated from metadata store.
185// If allocation successful, device will be activated with /dev/mapper/<deviceName>
186func (p *PoolDevice) CreateThinDevice(ctx context.Context, deviceName string, virtualSizeBytes uint64) (retErr error) {
187	info := &DeviceInfo{
188		Name:  deviceName,
189		Size:  virtualSizeBytes,
190		State: Unknown,
191	}
192
193	var (
194		metaErr   error
195		devErr    error
196		activeErr error
197	)
198
199	defer func() {
200		// We've created a devmapper device, but failed to activate it, try rollback everything
201		if activeErr != nil {
202			retErr = p.rollbackActivate(ctx, info, activeErr)
203			return
204		}
205
206		// We're unable to create the devmapper device, most likely something wrong with the deviceID
207		if devErr != nil {
208			retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, info.Name))
209			return
210		}
211	}()
212
213	// Save initial device metadata and allocate new device ID from store
214	metaErr = p.metadata.AddDevice(ctx, info)
215	if metaErr != nil {
216		return metaErr
217	}
218
219	// Create thin device
220	devErr = p.createDevice(ctx, info)
221	if devErr != nil {
222		return devErr
223	}
224
225	// Activate thin device
226	activeErr = p.activateDevice(ctx, info)
227	if activeErr != nil {
228		return activeErr
229	}
230
231	return nil
232}
233
234func (p *PoolDevice) rollbackActivate(ctx context.Context, info *DeviceInfo, activateErr error) error {
235	// Delete the device first.
236	delErr := p.deleteDevice(ctx, info)
237	if delErr != nil {
238		// Failed to rollback, mark the device as faulty and keep metadata in order to
239		// preserve the faulty device ID
240		return multierror.Append(activateErr, delErr, p.metadata.MarkFaulty(ctx, info.Name))
241	}
242
243	// The devmapper device has been successfully deleted, deallocate device ID
244	if err := p.RemoveDevice(ctx, info.Name); err != nil {
245		return multierror.Append(activateErr, err)
246	}
247
248	return activateErr
249}
250
251// createDevice creates thin device
252func (p *PoolDevice) createDevice(ctx context.Context, info *DeviceInfo) error {
253	if err := p.transition(ctx, info.Name, Creating, Created, func() error {
254		return dmsetup.CreateDevice(p.poolName, info.DeviceID)
255	}); err != nil {
256		return errors.Wrapf(err, "failed to create new thin device %q (dev: %d)", info.Name, info.DeviceID)
257	}
258
259	return nil
260}
261
262// activateDevice activates thin device
263func (p *PoolDevice) activateDevice(ctx context.Context, info *DeviceInfo) error {
264	if err := p.transition(ctx, info.Name, Activating, Activated, func() error {
265		return dmsetup.ActivateDevice(p.poolName, info.Name, info.DeviceID, info.Size, "")
266	}); err != nil {
267		return errors.Wrapf(err, "failed to activate new thin device %q (dev: %d)", info.Name, info.DeviceID)
268	}
269
270	return nil
271}
272
273// CreateSnapshotDevice creates and activates new thin-device from parent thin-device (makes snapshot)
274func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string, snapshotName string, virtualSizeBytes uint64) (retErr error) {
275	baseInfo, err := p.metadata.GetDevice(ctx, deviceName)
276	if err != nil {
277		return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
278	}
279
280	snapInfo := &DeviceInfo{
281		Name:       snapshotName,
282		Size:       virtualSizeBytes,
283		ParentName: deviceName,
284		State:      Unknown,
285	}
286
287	var (
288		metaErr   error
289		devErr    error
290		activeErr error
291	)
292
293	defer func() {
294		// We've created a devmapper device, but failed to activate it, try rollback everything
295		if activeErr != nil {
296			retErr = p.rollbackActivate(ctx, snapInfo, activeErr)
297			return
298		}
299
300		// We're unable to create the devmapper device, most likely something wrong with the deviceID
301		if devErr != nil {
302			retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
303			return
304		}
305	}()
306
307	// Save snapshot metadata and allocate new device ID
308	metaErr = p.metadata.AddDevice(ctx, snapInfo)
309	if metaErr != nil {
310		return metaErr
311	}
312
313	// The base device must be suspend before taking a snapshot to
314	// avoid corruption.
315	// https://github.com/torvalds/linux/blob/v5.7/Documentation/admin-guide/device-mapper/thin-provisioning.rst#internal-snapshots
316	if p.IsLoaded(deviceName) {
317		log.G(ctx).Debugf("suspending %q before taking its snapshot", deviceName)
318		suspendErr := p.SuspendDevice(ctx, deviceName)
319		if suspendErr != nil {
320			return suspendErr
321		}
322		defer func() {
323			err := p.ResumeDevice(ctx, deviceName)
324			if err != nil {
325				log.G(ctx).WithError(err).Errorf("failed to resume base device %q after taking its snapshot", baseInfo.Name)
326			}
327		}()
328	}
329
330	// Create thin device snapshot
331	devErr = p.createSnapshot(ctx, baseInfo, snapInfo)
332	if devErr != nil {
333		return devErr
334	}
335
336	// Activate the snapshot device
337	activeErr = p.activateDevice(ctx, snapInfo)
338	if activeErr != nil {
339		return activeErr
340	}
341
342	return nil
343}
344
345func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
346	if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
347		return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
348	}); err != nil {
349		return errors.Wrapf(err,
350			"failed to create snapshot %q (dev: %d) from %q (dev: %d)",
351			snapInfo.Name,
352			snapInfo.DeviceID,
353			baseInfo.Name,
354			baseInfo.DeviceID)
355	}
356
357	return nil
358}
359
360// SuspendDevice flushes the outstanding IO and blocks the further IO
361func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error {
362	if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error {
363		return dmsetup.SuspendDevice(deviceName)
364	}); err != nil {
365		return errors.Wrapf(err, "failed to suspend device %q", deviceName)
366	}
367
368	return nil
369}
370
371func (p *PoolDevice) ResumeDevice(ctx context.Context, deviceName string) error {
372	if err := p.transition(ctx, deviceName, Resuming, Resumed, func() error {
373		return dmsetup.ResumeDevice(deviceName)
374	}); err != nil {
375		return errors.Wrapf(err, "failed to resume device %q", deviceName)
376	}
377
378	return nil
379}
380
381// DeactivateDevice deactivates thin device
382func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error {
383	if !p.IsLoaded(deviceName) {
384		return nil
385	}
386
387	opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries}
388	if deferred {
389		opts = append(opts, dmsetup.RemoveDeferred)
390	}
391	if withForce {
392		opts = append(opts, dmsetup.RemoveWithForce)
393	}
394
395	if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
396		var (
397			maxRetries = 100
398			retryDelay = 100 * time.Millisecond
399			retryErr   error
400		)
401
402		for attempt := 1; attempt <= maxRetries; attempt++ {
403			retryErr = dmsetup.RemoveDevice(deviceName, opts...)
404			if retryErr == nil {
405				return nil
406			} else if retryErr != unix.EBUSY {
407				return retryErr
408			}
409
410			// Don't spam logs
411			if attempt%10 == 0 {
412				log.G(ctx).WithError(retryErr).Warnf("failed to deactivate device, retrying... (%d of %d)", attempt, maxRetries)
413			}
414
415			// Devmapper device is busy, give it a bit of time and retry removal
416			time.Sleep(retryDelay)
417		}
418
419		return retryErr
420	}); err != nil {
421		return errors.Wrapf(err, "failed to deactivate device %q", deviceName)
422	}
423
424	return nil
425}
426
427// IsActivated returns true if thin-device is activated
428func (p *PoolDevice) IsActivated(deviceName string) bool {
429	infos, err := dmsetup.Info(deviceName)
430	if err != nil || len(infos) != 1 {
431		// Couldn't query device info, device not active
432		return false
433	}
434
435	if devInfo := infos[0]; devInfo.TableLive {
436		return true
437	}
438
439	return false
440}
441
442// IsLoaded returns true if thin-device is visible for dmsetup
443func (p *PoolDevice) IsLoaded(deviceName string) bool {
444	_, err := dmsetup.Info(deviceName)
445	return err == nil
446}
447
448// GetUsage reports total size in bytes consumed by a thin-device.
449// It relies on the number of used blocks reported by 'dmsetup status'.
450// The output looks like:
451//  device2: 0 204800 thin 17280 204799
452// Where 17280 is the number of used sectors
453func (p *PoolDevice) GetUsage(deviceName string) (int64, error) {
454	status, err := dmsetup.Status(deviceName)
455	if err != nil {
456		return 0, errors.Wrapf(err, "can't get status for device %q", deviceName)
457	}
458
459	if len(status.Params) == 0 {
460		return 0, errors.Errorf("failed to get the number of used blocks, unexpected output from dmsetup status")
461	}
462
463	count, err := strconv.ParseInt(status.Params[0], 10, 64)
464	if err != nil {
465		return 0, errors.Wrapf(err, "failed to parse status params: %q", status.Params[0])
466	}
467
468	return count * dmsetup.SectorSize, nil
469}
470
471// RemoveDevice completely wipes out thin device from thin-pool and frees it's device ID
472func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error {
473	info, err := p.metadata.GetDevice(ctx, deviceName)
474	if err != nil {
475		return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
476	}
477
478	if err := p.DeactivateDevice(ctx, deviceName, false, true); err != nil {
479		return err
480	}
481
482	if err := p.deleteDevice(ctx, info); err != nil {
483		return err
484	}
485
486	// Remove record from meta store and free device ID
487	if err := p.metadata.RemoveDevice(ctx, deviceName); err != nil {
488		return errors.Wrapf(err, "can't remove device %q metadata from store after removal", deviceName)
489	}
490
491	return nil
492}
493
494func (p *PoolDevice) deleteDevice(ctx context.Context, info *DeviceInfo) error {
495	if err := p.transition(ctx, info.Name, Removing, Removed, func() error {
496		// Send 'delete' message to thin-pool
497		e := dmsetup.DeleteDevice(p.poolName, info.DeviceID)
498
499		// Ignores the error if the device has been deleted already.
500		if e != nil && !errors.Is(e, unix.ENODATA) {
501			return e
502		}
503		return nil
504	}); err != nil {
505		return errors.Wrapf(err, "failed to delete device %q (dev id: %d)", info.Name, info.DeviceID)
506	}
507
508	return nil
509}
510
511// RemovePool deactivates all child thin-devices and removes thin-pool device
512func (p *PoolDevice) RemovePool(ctx context.Context) error {
513	deviceNames, err := p.metadata.GetDeviceNames(ctx)
514	if err != nil {
515		return errors.Wrap(err, "can't query device names")
516	}
517
518	var result *multierror.Error
519
520	// Deactivate devices if any
521	for _, name := range deviceNames {
522		if err := p.DeactivateDevice(ctx, name, true, true); err != nil {
523			result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
524		}
525	}
526
527	if err := dmsetup.RemoveDevice(p.poolName, dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries, dmsetup.RemoveDeferred); err != nil {
528		result = multierror.Append(result, errors.Wrapf(err, "failed to remove pool %q", p.poolName))
529	}
530
531	return result.ErrorOrNil()
532}
533
534// MarkDeviceState changes the device's state in metastore
535func (p *PoolDevice) MarkDeviceState(ctx context.Context, name string, state DeviceState) error {
536	return p.metadata.ChangeDeviceState(ctx, name, state)
537}
538
539// WalkDevices iterates all devices in pool metadata
540func (p *PoolDevice) WalkDevices(ctx context.Context, cb func(info *DeviceInfo) error) error {
541	return p.metadata.WalkDevices(ctx, func(info *DeviceInfo) error {
542		return cb(info)
543	})
544}
545
546// Close closes pool device (thin-pool will not be removed)
547func (p *PoolDevice) Close() error {
548	return p.metadata.Close()
549}
550