1package netlink
2
3import (
4	"fmt"
5	"net"
6	"strings"
7	"syscall"
8
9	"github.com/vishvananda/netlink/nl"
10	"github.com/vishvananda/netns"
11	"golang.org/x/sys/unix"
12)
13
14// RtAttr is shared so it is in netlink_linux.go
15
16const (
17	SCOPE_UNIVERSE Scope = unix.RT_SCOPE_UNIVERSE
18	SCOPE_SITE     Scope = unix.RT_SCOPE_SITE
19	SCOPE_LINK     Scope = unix.RT_SCOPE_LINK
20	SCOPE_HOST     Scope = unix.RT_SCOPE_HOST
21	SCOPE_NOWHERE  Scope = unix.RT_SCOPE_NOWHERE
22)
23
24const (
25	RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
26	RT_FILTER_SCOPE
27	RT_FILTER_TYPE
28	RT_FILTER_TOS
29	RT_FILTER_IIF
30	RT_FILTER_OIF
31	RT_FILTER_DST
32	RT_FILTER_SRC
33	RT_FILTER_GW
34	RT_FILTER_TABLE
35)
36
37const (
38	FLAG_ONLINK    NextHopFlag = unix.RTNH_F_ONLINK
39	FLAG_PERVASIVE NextHopFlag = unix.RTNH_F_PERVASIVE
40)
41
42var testFlags = []flagString{
43	{f: FLAG_ONLINK, s: "onlink"},
44	{f: FLAG_PERVASIVE, s: "pervasive"},
45}
46
47func listFlags(flag int) []string {
48	var flags []string
49	for _, tf := range testFlags {
50		if flag&int(tf.f) != 0 {
51			flags = append(flags, tf.s)
52		}
53	}
54	return flags
55}
56
57func (r *Route) ListFlags() []string {
58	return listFlags(r.Flags)
59}
60
61func (n *NexthopInfo) ListFlags() []string {
62	return listFlags(n.Flags)
63}
64
65type MPLSDestination struct {
66	Labels []int
67}
68
69func (d *MPLSDestination) Family() int {
70	return nl.FAMILY_MPLS
71}
72
73func (d *MPLSDestination) Decode(buf []byte) error {
74	d.Labels = nl.DecodeMPLSStack(buf)
75	return nil
76}
77
78func (d *MPLSDestination) Encode() ([]byte, error) {
79	return nl.EncodeMPLSStack(d.Labels...), nil
80}
81
82func (d *MPLSDestination) String() string {
83	s := make([]string, 0, len(d.Labels))
84	for _, l := range d.Labels {
85		s = append(s, fmt.Sprintf("%d", l))
86	}
87	return strings.Join(s, "/")
88}
89
90func (d *MPLSDestination) Equal(x Destination) bool {
91	o, ok := x.(*MPLSDestination)
92	if !ok {
93		return false
94	}
95	if d == nil && o == nil {
96		return true
97	}
98	if d == nil || o == nil {
99		return false
100	}
101	if d.Labels == nil && o.Labels == nil {
102		return true
103	}
104	if d.Labels == nil || o.Labels == nil {
105		return false
106	}
107	if len(d.Labels) != len(o.Labels) {
108		return false
109	}
110	for i := range d.Labels {
111		if d.Labels[i] != o.Labels[i] {
112			return false
113		}
114	}
115	return true
116}
117
118type MPLSEncap struct {
119	Labels []int
120}
121
122func (e *MPLSEncap) Type() int {
123	return nl.LWTUNNEL_ENCAP_MPLS
124}
125
126func (e *MPLSEncap) Decode(buf []byte) error {
127	if len(buf) < 4 {
128		return fmt.Errorf("lack of bytes")
129	}
130	native := nl.NativeEndian()
131	l := native.Uint16(buf)
132	if len(buf) < int(l) {
133		return fmt.Errorf("lack of bytes")
134	}
135	buf = buf[:l]
136	typ := native.Uint16(buf[2:])
137	if typ != nl.MPLS_IPTUNNEL_DST {
138		return fmt.Errorf("unknown MPLS Encap Type: %d", typ)
139	}
140	e.Labels = nl.DecodeMPLSStack(buf[4:])
141	return nil
142}
143
144func (e *MPLSEncap) Encode() ([]byte, error) {
145	s := nl.EncodeMPLSStack(e.Labels...)
146	native := nl.NativeEndian()
147	hdr := make([]byte, 4)
148	native.PutUint16(hdr, uint16(len(s)+4))
149	native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST)
150	return append(hdr, s...), nil
151}
152
153func (e *MPLSEncap) String() string {
154	s := make([]string, 0, len(e.Labels))
155	for _, l := range e.Labels {
156		s = append(s, fmt.Sprintf("%d", l))
157	}
158	return strings.Join(s, "/")
159}
160
161func (e *MPLSEncap) Equal(x Encap) bool {
162	o, ok := x.(*MPLSEncap)
163	if !ok {
164		return false
165	}
166	if e == nil && o == nil {
167		return true
168	}
169	if e == nil || o == nil {
170		return false
171	}
172	if e.Labels == nil && o.Labels == nil {
173		return true
174	}
175	if e.Labels == nil || o.Labels == nil {
176		return false
177	}
178	if len(e.Labels) != len(o.Labels) {
179		return false
180	}
181	for i := range e.Labels {
182		if e.Labels[i] != o.Labels[i] {
183			return false
184		}
185	}
186	return true
187}
188
189// SEG6 definitions
190type SEG6Encap struct {
191	Mode     int
192	Segments []net.IP
193}
194
195func (e *SEG6Encap) Type() int {
196	return nl.LWTUNNEL_ENCAP_SEG6
197}
198func (e *SEG6Encap) Decode(buf []byte) error {
199	if len(buf) < 4 {
200		return fmt.Errorf("lack of bytes")
201	}
202	native := nl.NativeEndian()
203	// Get Length(l) & Type(typ) : 2 + 2 bytes
204	l := native.Uint16(buf)
205	if len(buf) < int(l) {
206		return fmt.Errorf("lack of bytes")
207	}
208	buf = buf[:l] // make sure buf size upper limit is Length
209	typ := native.Uint16(buf[2:])
210	if typ != nl.SEG6_IPTUNNEL_SRH {
211		return fmt.Errorf("unknown SEG6 Type: %d", typ)
212	}
213
214	var err error
215	e.Mode, e.Segments, err = nl.DecodeSEG6Encap(buf[4:])
216
217	return err
218}
219func (e *SEG6Encap) Encode() ([]byte, error) {
220	s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments)
221	native := nl.NativeEndian()
222	hdr := make([]byte, 4)
223	native.PutUint16(hdr, uint16(len(s)+4))
224	native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH)
225	return append(hdr, s...), err
226}
227func (e *SEG6Encap) String() string {
228	segs := make([]string, 0, len(e.Segments))
229	// append segment backwards (from n to 0) since seg#0 is the last segment.
230	for i := len(e.Segments); i > 0; i-- {
231		segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
232	}
233	str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode),
234		len(e.Segments), strings.Join(segs, " "))
235	return str
236}
237func (e *SEG6Encap) Equal(x Encap) bool {
238	o, ok := x.(*SEG6Encap)
239	if !ok {
240		return false
241	}
242	if e == o {
243		return true
244	}
245	if e == nil || o == nil {
246		return false
247	}
248	if e.Mode != o.Mode {
249		return false
250	}
251	if len(e.Segments) != len(o.Segments) {
252		return false
253	}
254	for i := range e.Segments {
255		if !e.Segments[i].Equal(o.Segments[i]) {
256			return false
257		}
258	}
259	return true
260}
261
262// RouteAdd will add a route to the system.
263// Equivalent to: `ip route add $route`
264func RouteAdd(route *Route) error {
265	return pkgHandle.RouteAdd(route)
266}
267
268// RouteAdd will add a route to the system.
269// Equivalent to: `ip route add $route`
270func (h *Handle) RouteAdd(route *Route) error {
271	flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK
272	req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
273	return h.routeHandle(route, req, nl.NewRtMsg())
274}
275
276// RouteReplace will add a route to the system.
277// Equivalent to: `ip route replace $route`
278func RouteReplace(route *Route) error {
279	return pkgHandle.RouteReplace(route)
280}
281
282// RouteReplace will add a route to the system.
283// Equivalent to: `ip route replace $route`
284func (h *Handle) RouteReplace(route *Route) error {
285	flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK
286	req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
287	return h.routeHandle(route, req, nl.NewRtMsg())
288}
289
290// RouteDel will delete a route from the system.
291// Equivalent to: `ip route del $route`
292func RouteDel(route *Route) error {
293	return pkgHandle.RouteDel(route)
294}
295
296// RouteDel will delete a route from the system.
297// Equivalent to: `ip route del $route`
298func (h *Handle) RouteDel(route *Route) error {
299	req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK)
300	return h.routeHandle(route, req, nl.NewRtDelMsg())
301}
302
303func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
304	if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil {
305		return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil")
306	}
307
308	family := -1
309	var rtAttrs []*nl.RtAttr
310
311	if route.Dst != nil && route.Dst.IP != nil {
312		dstLen, _ := route.Dst.Mask.Size()
313		msg.Dst_len = uint8(dstLen)
314		dstFamily := nl.GetIPFamily(route.Dst.IP)
315		family = dstFamily
316		var dstData []byte
317		if dstFamily == FAMILY_V4 {
318			dstData = route.Dst.IP.To4()
319		} else {
320			dstData = route.Dst.IP.To16()
321		}
322		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData))
323	} else if route.MPLSDst != nil {
324		family = nl.FAMILY_MPLS
325		msg.Dst_len = uint8(20)
326		msg.Type = unix.RTN_UNICAST
327		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst)))
328	}
329
330	if route.NewDst != nil {
331		if family != -1 && family != route.NewDst.Family() {
332			return fmt.Errorf("new destination and destination are not the same address family")
333		}
334		buf, err := route.NewDst.Encode()
335		if err != nil {
336			return err
337		}
338		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_NEWDST, buf))
339	}
340
341	if route.Encap != nil {
342		buf := make([]byte, 2)
343		native.PutUint16(buf, uint16(route.Encap.Type()))
344		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
345		buf, err := route.Encap.Encode()
346		if err != nil {
347			return err
348		}
349		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP, buf))
350	}
351
352	if route.Src != nil {
353		srcFamily := nl.GetIPFamily(route.Src)
354		if family != -1 && family != srcFamily {
355			return fmt.Errorf("source and destination ip are not the same IP family")
356		}
357		family = srcFamily
358		var srcData []byte
359		if srcFamily == FAMILY_V4 {
360			srcData = route.Src.To4()
361		} else {
362			srcData = route.Src.To16()
363		}
364		// The commonly used src ip for routes is actually PREFSRC
365		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PREFSRC, srcData))
366	}
367
368	if route.Gw != nil {
369		gwFamily := nl.GetIPFamily(route.Gw)
370		if family != -1 && family != gwFamily {
371			return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
372		}
373		family = gwFamily
374		var gwData []byte
375		if gwFamily == FAMILY_V4 {
376			gwData = route.Gw.To4()
377		} else {
378			gwData = route.Gw.To16()
379		}
380		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData))
381	}
382
383	if len(route.MultiPath) > 0 {
384		buf := []byte{}
385		for _, nh := range route.MultiPath {
386			rtnh := &nl.RtNexthop{
387				RtNexthop: unix.RtNexthop{
388					Hops:    uint8(nh.Hops),
389					Ifindex: int32(nh.LinkIndex),
390					Flags:   uint8(nh.Flags),
391				},
392			}
393			children := []nl.NetlinkRequestData{}
394			if nh.Gw != nil {
395				gwFamily := nl.GetIPFamily(nh.Gw)
396				if family != -1 && family != gwFamily {
397					return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
398				}
399				if gwFamily == FAMILY_V4 {
400					children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To4())))
401				} else {
402					children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To16())))
403				}
404			}
405			if nh.NewDst != nil {
406				if family != -1 && family != nh.NewDst.Family() {
407					return fmt.Errorf("new destination and destination are not the same address family")
408				}
409				buf, err := nh.NewDst.Encode()
410				if err != nil {
411					return err
412				}
413				children = append(children, nl.NewRtAttr(nl.RTA_NEWDST, buf))
414			}
415			if nh.Encap != nil {
416				buf := make([]byte, 2)
417				native.PutUint16(buf, uint16(nh.Encap.Type()))
418				rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
419				buf, err := nh.Encap.Encode()
420				if err != nil {
421					return err
422				}
423				children = append(children, nl.NewRtAttr(nl.RTA_ENCAP, buf))
424			}
425			rtnh.Children = children
426			buf = append(buf, rtnh.Serialize()...)
427		}
428		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_MULTIPATH, buf))
429	}
430
431	if route.Table > 0 {
432		if route.Table >= 256 {
433			msg.Table = unix.RT_TABLE_UNSPEC
434			b := make([]byte, 4)
435			native.PutUint32(b, uint32(route.Table))
436			rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_TABLE, b))
437		} else {
438			msg.Table = uint8(route.Table)
439		}
440	}
441
442	if route.Priority > 0 {
443		b := make([]byte, 4)
444		native.PutUint32(b, uint32(route.Priority))
445		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b))
446	}
447	if route.Tos > 0 {
448		msg.Tos = uint8(route.Tos)
449	}
450	if route.Protocol > 0 {
451		msg.Protocol = uint8(route.Protocol)
452	}
453	if route.Type > 0 {
454		msg.Type = uint8(route.Type)
455	}
456
457	var metrics []*nl.RtAttr
458	// TODO: support other rta_metric values
459	if route.MTU > 0 {
460		b := nl.Uint32Attr(uint32(route.MTU))
461		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b))
462	}
463	if route.AdvMSS > 0 {
464		b := nl.Uint32Attr(uint32(route.AdvMSS))
465		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b))
466	}
467
468	if metrics != nil {
469		attr := nl.NewRtAttr(unix.RTA_METRICS, nil)
470		for _, metric := range metrics {
471			attr.AddChild(metric)
472		}
473		rtAttrs = append(rtAttrs, attr)
474	}
475
476	msg.Flags = uint32(route.Flags)
477	msg.Scope = uint8(route.Scope)
478	msg.Family = uint8(family)
479	req.AddData(msg)
480	for _, attr := range rtAttrs {
481		req.AddData(attr)
482	}
483
484	var (
485		b      = make([]byte, 4)
486		native = nl.NativeEndian()
487	)
488	native.PutUint32(b, uint32(route.LinkIndex))
489
490	req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
491
492	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
493	return err
494}
495
496// RouteList gets a list of routes in the system.
497// Equivalent to: `ip route show`.
498// The list can be filtered by link and ip family.
499func RouteList(link Link, family int) ([]Route, error) {
500	return pkgHandle.RouteList(link, family)
501}
502
503// RouteList gets a list of routes in the system.
504// Equivalent to: `ip route show`.
505// The list can be filtered by link and ip family.
506func (h *Handle) RouteList(link Link, family int) ([]Route, error) {
507	var routeFilter *Route
508	if link != nil {
509		routeFilter = &Route{
510			LinkIndex: link.Attrs().Index,
511		}
512	}
513	return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF)
514}
515
516// RouteListFiltered gets a list of routes in the system filtered with specified rules.
517// All rules must be defined in RouteFilter struct
518func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
519	return pkgHandle.RouteListFiltered(family, filter, filterMask)
520}
521
522// RouteListFiltered gets a list of routes in the system filtered with specified rules.
523// All rules must be defined in RouteFilter struct
524func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
525	req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP)
526	infmsg := nl.NewIfInfomsg(family)
527	req.AddData(infmsg)
528
529	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
530	if err != nil {
531		return nil, err
532	}
533
534	var res []Route
535	for _, m := range msgs {
536		msg := nl.DeserializeRtMsg(m)
537		if msg.Flags&unix.RTM_F_CLONED != 0 {
538			// Ignore cloned routes
539			continue
540		}
541		if msg.Table != unix.RT_TABLE_MAIN {
542			if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 {
543				// Ignore non-main tables
544				continue
545			}
546		}
547		route, err := deserializeRoute(m)
548		if err != nil {
549			return nil, err
550		}
551		if filter != nil {
552			switch {
553			case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table:
554				continue
555			case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol:
556				continue
557			case filterMask&RT_FILTER_SCOPE != 0 && route.Scope != filter.Scope:
558				continue
559			case filterMask&RT_FILTER_TYPE != 0 && route.Type != filter.Type:
560				continue
561			case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos:
562				continue
563			case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex:
564				continue
565			case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex:
566				continue
567			case filterMask&RT_FILTER_GW != 0 && !route.Gw.Equal(filter.Gw):
568				continue
569			case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src):
570				continue
571			case filterMask&RT_FILTER_DST != 0:
572				if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) {
573					if !ipNetEqual(route.Dst, filter.Dst) {
574						continue
575					}
576				}
577			}
578		}
579		res = append(res, route)
580	}
581	return res, nil
582}
583
584// deserializeRoute decodes a binary netlink message into a Route struct
585func deserializeRoute(m []byte) (Route, error) {
586	msg := nl.DeserializeRtMsg(m)
587	attrs, err := nl.ParseRouteAttr(m[msg.Len():])
588	if err != nil {
589		return Route{}, err
590	}
591	route := Route{
592		Scope:    Scope(msg.Scope),
593		Protocol: int(msg.Protocol),
594		Table:    int(msg.Table),
595		Type:     int(msg.Type),
596		Tos:      int(msg.Tos),
597		Flags:    int(msg.Flags),
598	}
599
600	native := nl.NativeEndian()
601	var encap, encapType syscall.NetlinkRouteAttr
602	for _, attr := range attrs {
603		switch attr.Attr.Type {
604		case unix.RTA_GATEWAY:
605			route.Gw = net.IP(attr.Value)
606		case unix.RTA_PREFSRC:
607			route.Src = net.IP(attr.Value)
608		case unix.RTA_DST:
609			if msg.Family == nl.FAMILY_MPLS {
610				stack := nl.DecodeMPLSStack(attr.Value)
611				if len(stack) == 0 || len(stack) > 1 {
612					return route, fmt.Errorf("invalid MPLS RTA_DST")
613				}
614				route.MPLSDst = &stack[0]
615			} else {
616				route.Dst = &net.IPNet{
617					IP:   attr.Value,
618					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
619				}
620			}
621		case unix.RTA_OIF:
622			route.LinkIndex = int(native.Uint32(attr.Value[0:4]))
623		case unix.RTA_IIF:
624			route.ILinkIndex = int(native.Uint32(attr.Value[0:4]))
625		case unix.RTA_PRIORITY:
626			route.Priority = int(native.Uint32(attr.Value[0:4]))
627		case unix.RTA_TABLE:
628			route.Table = int(native.Uint32(attr.Value[0:4]))
629		case unix.RTA_MULTIPATH:
630			parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) {
631				if len(value) < unix.SizeofRtNexthop {
632					return nil, nil, fmt.Errorf("lack of bytes")
633				}
634				nh := nl.DeserializeRtNexthop(value)
635				if len(value) < int(nh.RtNexthop.Len) {
636					return nil, nil, fmt.Errorf("lack of bytes")
637				}
638				info := &NexthopInfo{
639					LinkIndex: int(nh.RtNexthop.Ifindex),
640					Hops:      int(nh.RtNexthop.Hops),
641					Flags:     int(nh.RtNexthop.Flags),
642				}
643				attrs, err := nl.ParseRouteAttr(value[unix.SizeofRtNexthop:int(nh.RtNexthop.Len)])
644				if err != nil {
645					return nil, nil, err
646				}
647				var encap, encapType syscall.NetlinkRouteAttr
648				for _, attr := range attrs {
649					switch attr.Attr.Type {
650					case unix.RTA_GATEWAY:
651						info.Gw = net.IP(attr.Value)
652					case nl.RTA_NEWDST:
653						var d Destination
654						switch msg.Family {
655						case nl.FAMILY_MPLS:
656							d = &MPLSDestination{}
657						}
658						if err := d.Decode(attr.Value); err != nil {
659							return nil, nil, err
660						}
661						info.NewDst = d
662					case nl.RTA_ENCAP_TYPE:
663						encapType = attr
664					case nl.RTA_ENCAP:
665						encap = attr
666					}
667				}
668
669				if len(encap.Value) != 0 && len(encapType.Value) != 0 {
670					typ := int(native.Uint16(encapType.Value[0:2]))
671					var e Encap
672					switch typ {
673					case nl.LWTUNNEL_ENCAP_MPLS:
674						e = &MPLSEncap{}
675						if err := e.Decode(encap.Value); err != nil {
676							return nil, nil, err
677						}
678					}
679					info.Encap = e
680				}
681
682				return info, value[int(nh.RtNexthop.Len):], nil
683			}
684			rest := attr.Value
685			for len(rest) > 0 {
686				info, buf, err := parseRtNexthop(rest)
687				if err != nil {
688					return route, err
689				}
690				route.MultiPath = append(route.MultiPath, info)
691				rest = buf
692			}
693		case nl.RTA_NEWDST:
694			var d Destination
695			switch msg.Family {
696			case nl.FAMILY_MPLS:
697				d = &MPLSDestination{}
698			}
699			if err := d.Decode(attr.Value); err != nil {
700				return route, err
701			}
702			route.NewDst = d
703		case nl.RTA_ENCAP_TYPE:
704			encapType = attr
705		case nl.RTA_ENCAP:
706			encap = attr
707		case unix.RTA_METRICS:
708			metrics, err := nl.ParseRouteAttr(attr.Value)
709			if err != nil {
710				return route, err
711			}
712			for _, metric := range metrics {
713				switch metric.Attr.Type {
714				case unix.RTAX_MTU:
715					route.MTU = int(native.Uint32(metric.Value[0:4]))
716				case unix.RTAX_ADVMSS:
717					route.AdvMSS = int(native.Uint32(metric.Value[0:4]))
718				}
719			}
720		}
721	}
722
723	if len(encap.Value) != 0 && len(encapType.Value) != 0 {
724		typ := int(native.Uint16(encapType.Value[0:2]))
725		var e Encap
726		switch typ {
727		case nl.LWTUNNEL_ENCAP_MPLS:
728			e = &MPLSEncap{}
729			if err := e.Decode(encap.Value); err != nil {
730				return route, err
731			}
732		case nl.LWTUNNEL_ENCAP_SEG6:
733			e = &SEG6Encap{}
734			if err := e.Decode(encap.Value); err != nil {
735				return route, err
736			}
737		}
738		route.Encap = e
739	}
740
741	return route, nil
742}
743
744// RouteGet gets a route to a specific destination from the host system.
745// Equivalent to: 'ip route get'.
746func RouteGet(destination net.IP) ([]Route, error) {
747	return pkgHandle.RouteGet(destination)
748}
749
750// RouteGet gets a route to a specific destination from the host system.
751// Equivalent to: 'ip route get'.
752func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
753	req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST)
754	family := nl.GetIPFamily(destination)
755	var destinationData []byte
756	var bitlen uint8
757	if family == FAMILY_V4 {
758		destinationData = destination.To4()
759		bitlen = 32
760	} else {
761		destinationData = destination.To16()
762		bitlen = 128
763	}
764	msg := &nl.RtMsg{}
765	msg.Family = uint8(family)
766	msg.Dst_len = bitlen
767	req.AddData(msg)
768
769	rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData)
770	req.AddData(rtaDst)
771
772	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
773	if err != nil {
774		return nil, err
775	}
776
777	var res []Route
778	for _, m := range msgs {
779		route, err := deserializeRoute(m)
780		if err != nil {
781			return nil, err
782		}
783		res = append(res, route)
784	}
785	return res, nil
786
787}
788
789// RouteSubscribe takes a chan down which notifications will be sent
790// when routes are added or deleted. Close the 'done' chan to stop subscription.
791func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
792	return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false)
793}
794
795// RouteSubscribeAt works like RouteSubscribe plus it allows the caller
796// to choose the network namespace in which to subscribe (ns).
797func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
798	return routeSubscribeAt(ns, netns.None(), ch, done, nil, false)
799}
800
801// RouteSubscribeOptions contains a set of options to use with
802// RouteSubscribeWithOptions.
803type RouteSubscribeOptions struct {
804	Namespace     *netns.NsHandle
805	ErrorCallback func(error)
806	ListExisting  bool
807}
808
809// RouteSubscribeWithOptions work like RouteSubscribe but enable to
810// provide additional options to modify the behavior. Currently, the
811// namespace can be provided as well as an error callback.
812func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, options RouteSubscribeOptions) error {
813	if options.Namespace == nil {
814		none := netns.None()
815		options.Namespace = &none
816	}
817	return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting)
818}
819
820func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error {
821	s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE)
822	if err != nil {
823		return err
824	}
825	if done != nil {
826		go func() {
827			<-done
828			s.Close()
829		}()
830	}
831	if listExisting {
832		req := pkgHandle.newNetlinkRequest(unix.RTM_GETROUTE,
833			unix.NLM_F_DUMP)
834		infmsg := nl.NewIfInfomsg(unix.AF_UNSPEC)
835		req.AddData(infmsg)
836		if err := s.Send(req); err != nil {
837			return err
838		}
839	}
840	go func() {
841		defer close(ch)
842		for {
843			msgs, err := s.Receive()
844			if err != nil {
845				if cberr != nil {
846					cberr(err)
847				}
848				return
849			}
850			for _, m := range msgs {
851				if m.Header.Type == unix.NLMSG_DONE {
852					continue
853				}
854				if m.Header.Type == unix.NLMSG_ERROR {
855					native := nl.NativeEndian()
856					error := int32(native.Uint32(m.Data[0:4]))
857					if error == 0 {
858						continue
859					}
860					if cberr != nil {
861						cberr(syscall.Errno(-error))
862					}
863					return
864				}
865				route, err := deserializeRoute(m.Data)
866				if err != nil {
867					if cberr != nil {
868						cberr(err)
869					}
870					return
871				}
872				ch <- RouteUpdate{Type: m.Header.Type, Route: route}
873			}
874		}
875	}()
876
877	return nil
878}
879