1package container // import "github.com/docker/docker/daemon/cluster/executor/container"
2
3import (
4	"errors"
5	"fmt"
6	"net"
7	"strconv"
8	"strings"
9
10	"github.com/sirupsen/logrus"
11
12	"github.com/docker/distribution/reference"
13	"github.com/docker/docker/api/types"
14	enginecontainer "github.com/docker/docker/api/types/container"
15	"github.com/docker/docker/api/types/events"
16	"github.com/docker/docker/api/types/filters"
17	enginemount "github.com/docker/docker/api/types/mount"
18	"github.com/docker/docker/api/types/network"
19	volumetypes "github.com/docker/docker/api/types/volume"
20	"github.com/docker/docker/daemon/cluster/convert"
21	executorpkg "github.com/docker/docker/daemon/cluster/executor"
22	clustertypes "github.com/docker/docker/daemon/cluster/provider"
23	"github.com/docker/go-connections/nat"
24	"github.com/docker/go-units"
25	netconst "github.com/docker/libnetwork/datastore"
26	"github.com/docker/swarmkit/agent/exec"
27	"github.com/docker/swarmkit/api"
28	"github.com/docker/swarmkit/api/genericresource"
29	"github.com/docker/swarmkit/template"
30	gogotypes "github.com/gogo/protobuf/types"
31)
32
33const (
34	// systemLabelPrefix represents the reserved namespace for system labels.
35	systemLabelPrefix = "com.docker.swarm"
36)
37
38// containerConfig converts task properties into docker container compatible
39// components.
40type containerConfig struct {
41	task                *api.Task
42	networksAttachments map[string]*api.NetworkAttachment
43}
44
45// newContainerConfig returns a validated container config. No methods should
46// return an error if this function returns without error.
47func newContainerConfig(t *api.Task, node *api.NodeDescription) (*containerConfig, error) {
48	var c containerConfig
49	return &c, c.setTask(t, node)
50}
51
52func (c *containerConfig) setTask(t *api.Task, node *api.NodeDescription) error {
53	if t.Spec.GetContainer() == nil && t.Spec.GetAttachment() == nil {
54		return exec.ErrRuntimeUnsupported
55	}
56
57	container := t.Spec.GetContainer()
58	if container != nil {
59		if container.Image == "" {
60			return ErrImageRequired
61		}
62
63		if err := validateMounts(container.Mounts); err != nil {
64			return err
65		}
66	}
67
68	// index the networks by name
69	c.networksAttachments = make(map[string]*api.NetworkAttachment, len(t.Networks))
70	for _, attachment := range t.Networks {
71		c.networksAttachments[attachment.Network.Spec.Annotations.Name] = attachment
72	}
73
74	c.task = t
75
76	if t.Spec.GetContainer() != nil {
77		preparedSpec, err := template.ExpandContainerSpec(node, t)
78		if err != nil {
79			return err
80		}
81		c.task.Spec.Runtime = &api.TaskSpec_Container{
82			Container: preparedSpec,
83		}
84	}
85
86	return nil
87}
88
89func (c *containerConfig) networkAttachmentContainerID() string {
90	attachment := c.task.Spec.GetAttachment()
91	if attachment == nil {
92		return ""
93	}
94
95	return attachment.ContainerID
96}
97
98func (c *containerConfig) taskID() string {
99	return c.task.ID
100}
101
102func (c *containerConfig) spec() *api.ContainerSpec {
103	return c.task.Spec.GetContainer()
104}
105
106func (c *containerConfig) nameOrID() string {
107	if c.task.Spec.GetContainer() != nil {
108		return c.name()
109	}
110
111	return c.networkAttachmentContainerID()
112}
113
114func (c *containerConfig) name() string {
115	if c.task.Annotations.Name != "" {
116		// if set, use the container Annotations.Name field, set in the orchestrator.
117		return c.task.Annotations.Name
118	}
119
120	slot := fmt.Sprint(c.task.Slot)
121	if slot == "" || c.task.Slot == 0 {
122		slot = c.task.NodeID
123	}
124
125	// fallback to service.slot.id.
126	return fmt.Sprintf("%s.%s.%s", c.task.ServiceAnnotations.Name, slot, c.task.ID)
127}
128
129func (c *containerConfig) image() string {
130	raw := c.spec().Image
131	ref, err := reference.ParseNormalizedNamed(raw)
132	if err != nil {
133		return raw
134	}
135	return reference.FamiliarString(reference.TagNameOnly(ref))
136}
137
138func (c *containerConfig) portBindings() nat.PortMap {
139	portBindings := nat.PortMap{}
140	if c.task.Endpoint == nil {
141		return portBindings
142	}
143
144	for _, portConfig := range c.task.Endpoint.Ports {
145		if portConfig.PublishMode != api.PublishModeHost {
146			continue
147		}
148
149		port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String())))
150		binding := []nat.PortBinding{
151			{},
152		}
153
154		if portConfig.PublishedPort != 0 {
155			binding[0].HostPort = strconv.Itoa(int(portConfig.PublishedPort))
156		}
157		portBindings[port] = binding
158	}
159
160	return portBindings
161}
162
163func (c *containerConfig) isolation() enginecontainer.Isolation {
164	return convert.IsolationFromGRPC(c.spec().Isolation)
165}
166
167func (c *containerConfig) init() *bool {
168	if c.spec().Init == nil {
169		return nil
170	}
171	init := c.spec().Init.GetValue()
172	return &init
173}
174
175func (c *containerConfig) exposedPorts() map[nat.Port]struct{} {
176	exposedPorts := make(map[nat.Port]struct{})
177	if c.task.Endpoint == nil {
178		return exposedPorts
179	}
180
181	for _, portConfig := range c.task.Endpoint.Ports {
182		if portConfig.PublishMode != api.PublishModeHost {
183			continue
184		}
185
186		port := nat.Port(fmt.Sprintf("%d/%s", portConfig.TargetPort, strings.ToLower(portConfig.Protocol.String())))
187		exposedPorts[port] = struct{}{}
188	}
189
190	return exposedPorts
191}
192
193func (c *containerConfig) config() *enginecontainer.Config {
194	genericEnvs := genericresource.EnvFormat(c.task.AssignedGenericResources, "DOCKER_RESOURCE")
195	env := append(c.spec().Env, genericEnvs...)
196
197	config := &enginecontainer.Config{
198		Labels:       c.labels(),
199		StopSignal:   c.spec().StopSignal,
200		Tty:          c.spec().TTY,
201		OpenStdin:    c.spec().OpenStdin,
202		User:         c.spec().User,
203		Env:          env,
204		Hostname:     c.spec().Hostname,
205		WorkingDir:   c.spec().Dir,
206		Image:        c.image(),
207		ExposedPorts: c.exposedPorts(),
208		Healthcheck:  c.healthcheck(),
209	}
210
211	if len(c.spec().Command) > 0 {
212		// If Command is provided, we replace the whole invocation with Command
213		// by replacing Entrypoint and specifying Cmd. Args is ignored in this
214		// case.
215		config.Entrypoint = append(config.Entrypoint, c.spec().Command...)
216		config.Cmd = append(config.Cmd, c.spec().Args...)
217	} else if len(c.spec().Args) > 0 {
218		// In this case, we assume the image has an Entrypoint and Args
219		// specifies the arguments for that entrypoint.
220		config.Cmd = c.spec().Args
221	}
222
223	return config
224}
225
226func (c *containerConfig) labels() map[string]string {
227	var (
228		system = map[string]string{
229			"task":         "", // mark as cluster task
230			"task.id":      c.task.ID,
231			"task.name":    c.name(),
232			"node.id":      c.task.NodeID,
233			"service.id":   c.task.ServiceID,
234			"service.name": c.task.ServiceAnnotations.Name,
235		}
236		labels = make(map[string]string)
237	)
238
239	// base labels are those defined in the spec.
240	for k, v := range c.spec().Labels {
241		labels[k] = v
242	}
243
244	// we then apply the overrides from the task, which may be set via the
245	// orchestrator.
246	for k, v := range c.task.Annotations.Labels {
247		labels[k] = v
248	}
249
250	// finally, we apply the system labels, which override all labels.
251	for k, v := range system {
252		labels[strings.Join([]string{systemLabelPrefix, k}, ".")] = v
253	}
254
255	return labels
256}
257
258func (c *containerConfig) mounts() []enginemount.Mount {
259	var r []enginemount.Mount
260	for _, mount := range c.spec().Mounts {
261		r = append(r, convertMount(mount))
262	}
263	return r
264}
265
266func convertMount(m api.Mount) enginemount.Mount {
267	mount := enginemount.Mount{
268		Source:   m.Source,
269		Target:   m.Target,
270		ReadOnly: m.ReadOnly,
271	}
272
273	switch m.Type {
274	case api.MountTypeBind:
275		mount.Type = enginemount.TypeBind
276	case api.MountTypeVolume:
277		mount.Type = enginemount.TypeVolume
278	case api.MountTypeTmpfs:
279		mount.Type = enginemount.TypeTmpfs
280	case api.MountTypeNamedPipe:
281		mount.Type = enginemount.TypeNamedPipe
282	}
283
284	if m.BindOptions != nil {
285		mount.BindOptions = &enginemount.BindOptions{
286			NonRecursive: m.BindOptions.NonRecursive,
287		}
288		switch m.BindOptions.Propagation {
289		case api.MountPropagationRPrivate:
290			mount.BindOptions.Propagation = enginemount.PropagationRPrivate
291		case api.MountPropagationPrivate:
292			mount.BindOptions.Propagation = enginemount.PropagationPrivate
293		case api.MountPropagationRSlave:
294			mount.BindOptions.Propagation = enginemount.PropagationRSlave
295		case api.MountPropagationSlave:
296			mount.BindOptions.Propagation = enginemount.PropagationSlave
297		case api.MountPropagationRShared:
298			mount.BindOptions.Propagation = enginemount.PropagationRShared
299		case api.MountPropagationShared:
300			mount.BindOptions.Propagation = enginemount.PropagationShared
301		}
302	}
303
304	if m.VolumeOptions != nil {
305		mount.VolumeOptions = &enginemount.VolumeOptions{
306			NoCopy: m.VolumeOptions.NoCopy,
307		}
308		if m.VolumeOptions.Labels != nil {
309			mount.VolumeOptions.Labels = make(map[string]string, len(m.VolumeOptions.Labels))
310			for k, v := range m.VolumeOptions.Labels {
311				mount.VolumeOptions.Labels[k] = v
312			}
313		}
314		if m.VolumeOptions.DriverConfig != nil {
315			mount.VolumeOptions.DriverConfig = &enginemount.Driver{
316				Name: m.VolumeOptions.DriverConfig.Name,
317			}
318			if m.VolumeOptions.DriverConfig.Options != nil {
319				mount.VolumeOptions.DriverConfig.Options = make(map[string]string, len(m.VolumeOptions.DriverConfig.Options))
320				for k, v := range m.VolumeOptions.DriverConfig.Options {
321					mount.VolumeOptions.DriverConfig.Options[k] = v
322				}
323			}
324		}
325	}
326
327	if m.TmpfsOptions != nil {
328		mount.TmpfsOptions = &enginemount.TmpfsOptions{
329			SizeBytes: m.TmpfsOptions.SizeBytes,
330			Mode:      m.TmpfsOptions.Mode,
331		}
332	}
333
334	return mount
335}
336
337func (c *containerConfig) healthcheck() *enginecontainer.HealthConfig {
338	hcSpec := c.spec().Healthcheck
339	if hcSpec == nil {
340		return nil
341	}
342	interval, _ := gogotypes.DurationFromProto(hcSpec.Interval)
343	timeout, _ := gogotypes.DurationFromProto(hcSpec.Timeout)
344	startPeriod, _ := gogotypes.DurationFromProto(hcSpec.StartPeriod)
345	return &enginecontainer.HealthConfig{
346		Test:        hcSpec.Test,
347		Interval:    interval,
348		Timeout:     timeout,
349		Retries:     int(hcSpec.Retries),
350		StartPeriod: startPeriod,
351	}
352}
353
354func (c *containerConfig) hostConfig() *enginecontainer.HostConfig {
355	hc := &enginecontainer.HostConfig{
356		Resources:      c.resources(),
357		GroupAdd:       c.spec().Groups,
358		PortBindings:   c.portBindings(),
359		Mounts:         c.mounts(),
360		ReadonlyRootfs: c.spec().ReadOnly,
361		Isolation:      c.isolation(),
362		Init:           c.init(),
363		Sysctls:        c.spec().Sysctls,
364		CapAdd:         c.spec().CapabilityAdd,
365		CapDrop:        c.spec().CapabilityDrop,
366	}
367
368	if c.spec().DNSConfig != nil {
369		hc.DNS = c.spec().DNSConfig.Nameservers
370		hc.DNSSearch = c.spec().DNSConfig.Search
371		hc.DNSOptions = c.spec().DNSConfig.Options
372	}
373
374	c.applyPrivileges(hc)
375
376	// The format of extra hosts on swarmkit is specified in:
377	// http://man7.org/linux/man-pages/man5/hosts.5.html
378	//    IP_address canonical_hostname [aliases...]
379	// However, the format of ExtraHosts in HostConfig is
380	//    <host>:<ip>
381	// We need to do the conversion here
382	// (Alias is ignored for now)
383	for _, entry := range c.spec().Hosts {
384		parts := strings.Fields(entry)
385		if len(parts) > 1 {
386			hc.ExtraHosts = append(hc.ExtraHosts, fmt.Sprintf("%s:%s", parts[1], parts[0]))
387		}
388	}
389
390	if c.task.LogDriver != nil {
391		hc.LogConfig = enginecontainer.LogConfig{
392			Type:   c.task.LogDriver.Name,
393			Config: c.task.LogDriver.Options,
394		}
395	}
396
397	if len(c.task.Networks) > 0 {
398		labels := c.task.Networks[0].Network.Spec.Annotations.Labels
399		name := c.task.Networks[0].Network.Spec.Annotations.Name
400		if v, ok := labels["com.docker.swarm.predefined"]; ok && v == "true" {
401			hc.NetworkMode = enginecontainer.NetworkMode(name)
402		}
403	}
404
405	return hc
406}
407
408// This handles the case of volumes that are defined inside a service Mount
409func (c *containerConfig) volumeCreateRequest(mount *api.Mount) *volumetypes.VolumeCreateBody {
410	var (
411		driverName string
412		driverOpts map[string]string
413		labels     map[string]string
414	)
415
416	if mount.VolumeOptions != nil && mount.VolumeOptions.DriverConfig != nil {
417		driverName = mount.VolumeOptions.DriverConfig.Name
418		driverOpts = mount.VolumeOptions.DriverConfig.Options
419		labels = mount.VolumeOptions.Labels
420	}
421
422	if mount.VolumeOptions != nil {
423		return &volumetypes.VolumeCreateBody{
424			Name:       mount.Source,
425			Driver:     driverName,
426			DriverOpts: driverOpts,
427			Labels:     labels,
428		}
429	}
430	return nil
431}
432
433func (c *containerConfig) resources() enginecontainer.Resources {
434	resources := enginecontainer.Resources{}
435
436	// set pids limit
437	pidsLimit := c.spec().PidsLimit
438	if pidsLimit > 0 {
439		resources.PidsLimit = &pidsLimit
440	}
441
442	resources.Ulimits = make([]*units.Ulimit, len(c.spec().Ulimits))
443	for i, ulimit := range c.spec().Ulimits {
444		resources.Ulimits[i] = &units.Ulimit{
445			Name: ulimit.Name,
446			Soft: ulimit.Soft,
447			Hard: ulimit.Hard,
448		}
449	}
450
451	// If no limits are specified let the engine use its defaults.
452	//
453	// TODO(aluzzardi): We might want to set some limits anyway otherwise
454	// "unlimited" tasks will step over the reservation of other tasks.
455	r := c.task.Spec.Resources
456	if r == nil || r.Limits == nil {
457		return resources
458	}
459
460	if r.Limits.MemoryBytes > 0 {
461		resources.Memory = r.Limits.MemoryBytes
462	}
463
464	if r.Limits.NanoCPUs > 0 {
465		resources.NanoCPUs = r.Limits.NanoCPUs
466	}
467
468	return resources
469}
470
471// Docker daemon supports just 1 network during container create.
472func (c *containerConfig) createNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig {
473	var networks []*api.NetworkAttachment
474	if c.task.Spec.GetContainer() != nil || c.task.Spec.GetAttachment() != nil {
475		networks = c.task.Networks
476	}
477
478	epConfig := make(map[string]*network.EndpointSettings)
479	if len(networks) > 0 {
480		epConfig[networks[0].Network.Spec.Annotations.Name] = getEndpointConfig(networks[0], b)
481	}
482
483	return &network.NetworkingConfig{EndpointsConfig: epConfig}
484}
485
486// TODO: Merge this function with createNetworkingConfig after daemon supports multiple networks in container create
487func (c *containerConfig) connectNetworkingConfig(b executorpkg.Backend) *network.NetworkingConfig {
488	var networks []*api.NetworkAttachment
489	if c.task.Spec.GetContainer() != nil {
490		networks = c.task.Networks
491	}
492	// First network is used during container create. Other networks are used in "docker network connect"
493	if len(networks) < 2 {
494		return nil
495	}
496
497	epConfig := make(map[string]*network.EndpointSettings)
498	for _, na := range networks[1:] {
499		epConfig[na.Network.Spec.Annotations.Name] = getEndpointConfig(na, b)
500	}
501	return &network.NetworkingConfig{EndpointsConfig: epConfig}
502}
503
504func getEndpointConfig(na *api.NetworkAttachment, b executorpkg.Backend) *network.EndpointSettings {
505	var ipv4, ipv6 string
506	for _, addr := range na.Addresses {
507		ip, _, err := net.ParseCIDR(addr)
508		if err != nil {
509			continue
510		}
511
512		if ip.To4() != nil {
513			ipv4 = ip.String()
514			continue
515		}
516
517		if ip.To16() != nil {
518			ipv6 = ip.String()
519		}
520	}
521
522	n := &network.EndpointSettings{
523		NetworkID: na.Network.ID,
524		IPAMConfig: &network.EndpointIPAMConfig{
525			IPv4Address: ipv4,
526			IPv6Address: ipv6,
527		},
528		DriverOpts: na.DriverAttachmentOpts,
529	}
530	if v, ok := na.Network.Spec.Annotations.Labels["com.docker.swarm.predefined"]; ok && v == "true" {
531		if ln, err := b.FindNetwork(na.Network.Spec.Annotations.Name); err == nil {
532			n.NetworkID = ln.ID()
533		}
534	}
535	return n
536}
537
538func (c *containerConfig) virtualIP(networkID string) string {
539	if c.task.Endpoint == nil {
540		return ""
541	}
542
543	for _, eVip := range c.task.Endpoint.VirtualIPs {
544		// We only support IPv4 VIPs for now.
545		if eVip.NetworkID == networkID {
546			vip, _, err := net.ParseCIDR(eVip.Addr)
547			if err != nil {
548				return ""
549			}
550
551			return vip.String()
552		}
553	}
554
555	return ""
556}
557
558func (c *containerConfig) serviceConfig() *clustertypes.ServiceConfig {
559	if len(c.task.Networks) == 0 {
560		return nil
561	}
562
563	logrus.Debugf("Creating service config in agent for t = %+v", c.task)
564	svcCfg := &clustertypes.ServiceConfig{
565		Name:             c.task.ServiceAnnotations.Name,
566		Aliases:          make(map[string][]string),
567		ID:               c.task.ServiceID,
568		VirtualAddresses: make(map[string]*clustertypes.VirtualAddress),
569	}
570
571	for _, na := range c.task.Networks {
572		svcCfg.VirtualAddresses[na.Network.ID] = &clustertypes.VirtualAddress{
573			// We support only IPv4 virtual IP for now.
574			IPv4: c.virtualIP(na.Network.ID),
575		}
576		if len(na.Aliases) > 0 {
577			svcCfg.Aliases[na.Network.ID] = na.Aliases
578		}
579	}
580
581	if c.task.Endpoint != nil {
582		for _, ePort := range c.task.Endpoint.Ports {
583			if ePort.PublishMode != api.PublishModeIngress {
584				continue
585			}
586
587			svcCfg.ExposedPorts = append(svcCfg.ExposedPorts, &clustertypes.PortConfig{
588				Name:          ePort.Name,
589				Protocol:      int32(ePort.Protocol),
590				TargetPort:    ePort.TargetPort,
591				PublishedPort: ePort.PublishedPort,
592			})
593		}
594	}
595
596	return svcCfg
597}
598
599func (c *containerConfig) networkCreateRequest(name string) (clustertypes.NetworkCreateRequest, error) {
600	na, ok := c.networksAttachments[name]
601	if !ok {
602		return clustertypes.NetworkCreateRequest{}, errors.New("container: unknown network referenced")
603	}
604
605	options := types.NetworkCreate{
606		// ID:     na.Network.ID,
607		Labels:         na.Network.Spec.Annotations.Labels,
608		Internal:       na.Network.Spec.Internal,
609		Attachable:     na.Network.Spec.Attachable,
610		Ingress:        convert.IsIngressNetwork(na.Network),
611		EnableIPv6:     na.Network.Spec.Ipv6Enabled,
612		CheckDuplicate: true,
613		Scope:          netconst.SwarmScope,
614	}
615
616	if na.Network.Spec.GetNetwork() != "" {
617		options.ConfigFrom = &network.ConfigReference{
618			Network: na.Network.Spec.GetNetwork(),
619		}
620	}
621
622	if na.Network.DriverState != nil {
623		options.Driver = na.Network.DriverState.Name
624		options.Options = na.Network.DriverState.Options
625	}
626	if na.Network.IPAM != nil {
627		options.IPAM = &network.IPAM{
628			Driver:  na.Network.IPAM.Driver.Name,
629			Options: na.Network.IPAM.Driver.Options,
630		}
631		for _, ic := range na.Network.IPAM.Configs {
632			c := network.IPAMConfig{
633				Subnet:  ic.Subnet,
634				IPRange: ic.Range,
635				Gateway: ic.Gateway,
636			}
637			options.IPAM.Config = append(options.IPAM.Config, c)
638		}
639	}
640
641	return clustertypes.NetworkCreateRequest{
642		ID: na.Network.ID,
643		NetworkCreateRequest: types.NetworkCreateRequest{
644			Name:          name,
645			NetworkCreate: options,
646		},
647	}, nil
648}
649
650func (c *containerConfig) applyPrivileges(hc *enginecontainer.HostConfig) {
651	privileges := c.spec().Privileges
652	if privileges == nil {
653		return
654	}
655
656	credentials := privileges.CredentialSpec
657	if credentials != nil {
658		switch credentials.Source.(type) {
659		case *api.Privileges_CredentialSpec_File:
660			hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=file://"+credentials.GetFile())
661		case *api.Privileges_CredentialSpec_Registry:
662			hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=registry://"+credentials.GetRegistry())
663		case *api.Privileges_CredentialSpec_Config:
664			hc.SecurityOpt = append(hc.SecurityOpt, "credentialspec=config://"+credentials.GetConfig())
665		}
666	}
667
668	selinux := privileges.SELinuxContext
669	if selinux != nil {
670		if selinux.Disable {
671			hc.SecurityOpt = append(hc.SecurityOpt, "label=disable")
672		}
673		if selinux.User != "" {
674			hc.SecurityOpt = append(hc.SecurityOpt, "label=user:"+selinux.User)
675		}
676		if selinux.Role != "" {
677			hc.SecurityOpt = append(hc.SecurityOpt, "label=role:"+selinux.Role)
678		}
679		if selinux.Level != "" {
680			hc.SecurityOpt = append(hc.SecurityOpt, "label=level:"+selinux.Level)
681		}
682		if selinux.Type != "" {
683			hc.SecurityOpt = append(hc.SecurityOpt, "label=type:"+selinux.Type)
684		}
685	}
686}
687
688func (c containerConfig) eventFilter() filters.Args {
689	filter := filters.NewArgs()
690	filter.Add("type", events.ContainerEventType)
691	filter.Add("name", c.name())
692	filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
693	return filter
694}
695