1package rtnetlink
2
3import (
4	"errors"
5	"fmt"
6	"net"
7
8	"github.com/jsimonetti/rtnetlink/internal/unix"
9
10	"github.com/mdlayher/netlink"
11)
12
13var (
14	// errInvalidLinkMessage is returned when a LinkMessage is malformed.
15	errInvalidLinkMessage = errors.New("rtnetlink LinkMessage is invalid or too short")
16
17	// errInvalidLinkMessageAttr is returned when link attributes are malformed.
18	errInvalidLinkMessageAttr = errors.New("rtnetlink LinkMessage has a wrong attribute data length")
19)
20
21var _ Message = &LinkMessage{}
22
23// A LinkMessage is a route netlink link message.
24type LinkMessage struct {
25	// Always set to AF_UNSPEC (0)
26	Family uint16
27
28	// Device Type
29	Type uint16
30
31	// Unique interface index, using a nonzero value with
32	// NewLink will instruct the kernel to create a
33	// device with the given index (kernel 3.7+ required)
34	Index uint32
35
36	// Contains device flags, see netdevice(7)
37	Flags uint32
38
39	// Change Flags, specifies which flags will be affected by the Flags field
40	Change uint32
41
42	// Attributes List
43	Attributes *LinkAttributes
44}
45
46// MarshalBinary marshals a LinkMessage into a byte slice.
47func (m *LinkMessage) MarshalBinary() ([]byte, error) {
48	b := make([]byte, unix.SizeofIfInfomsg)
49
50	b[0] = 0 // Family
51	b[1] = 0 // reserved
52	nativeEndian.PutUint16(b[2:4], m.Type)
53	nativeEndian.PutUint32(b[4:8], m.Index)
54	nativeEndian.PutUint32(b[8:12], m.Flags)
55	nativeEndian.PutUint32(b[12:16], m.Change)
56
57	if m.Attributes != nil {
58		ae := netlink.NewAttributeEncoder()
59		ae.ByteOrder = nativeEndian
60		err := m.Attributes.encode(ae)
61		if err != nil {
62			return nil, err
63		}
64
65		a, err := ae.Encode()
66		if err != nil {
67			return nil, err
68		}
69
70		return append(b, a...), nil
71	}
72
73	return b, nil
74}
75
76// UnmarshalBinary unmarshals the contents of a byte slice into a LinkMessage.
77func (m *LinkMessage) UnmarshalBinary(b []byte) error {
78	l := len(b)
79	if l < unix.SizeofIfInfomsg {
80		return errInvalidLinkMessage
81	}
82
83	m.Family = nativeEndian.Uint16(b[0:2])
84	m.Type = nativeEndian.Uint16(b[2:4])
85	m.Index = nativeEndian.Uint32(b[4:8])
86	m.Flags = nativeEndian.Uint32(b[8:12])
87	m.Change = nativeEndian.Uint32(b[12:16])
88
89	if l > unix.SizeofIfInfomsg {
90		m.Attributes = &LinkAttributes{}
91		ad, err := netlink.NewAttributeDecoder(b[16:])
92		if err != nil {
93			return err
94		}
95		ad.ByteOrder = nativeEndian
96		err = m.Attributes.decode(ad)
97		if err != nil {
98			return err
99		}
100	}
101
102	return nil
103}
104
105// rtMessage is an empty method to sattisfy the Message interface.
106func (*LinkMessage) rtMessage() {}
107
108// LinkService is used to retrieve rtnetlink family information.
109type LinkService struct {
110	c *Conn
111}
112
113// execute executes the request and returns the messages as a LinkMessage slice
114func (l *LinkService) execute(m Message, family uint16, flags netlink.HeaderFlags) ([]LinkMessage, error) {
115	msgs, err := l.c.Execute(m, family, flags)
116
117	links := make([]LinkMessage, len(msgs))
118	for i := range msgs {
119		links[i] = *msgs[i].(*LinkMessage)
120	}
121
122	return links, err
123}
124
125// New creates a new interface using the LinkMessage information.
126func (l *LinkService) New(req *LinkMessage) error {
127	flags := netlink.Request | netlink.Create | netlink.Acknowledge | netlink.Excl
128	_, err := l.execute(req, unix.RTM_NEWLINK, flags)
129
130	return err
131}
132
133// Delete removes an interface by index.
134func (l *LinkService) Delete(index uint32) error {
135	req := &LinkMessage{
136		Index: index,
137	}
138
139	flags := netlink.Request | netlink.Acknowledge
140	_, err := l.c.Execute(req, unix.RTM_DELLINK, flags)
141
142	return err
143}
144
145// Get retrieves interface information by index.
146func (l *LinkService) Get(index uint32) (LinkMessage, error) {
147	req := &LinkMessage{
148		Index: index,
149	}
150
151	flags := netlink.Request | netlink.DumpFiltered
152	links, err := l.execute(req, unix.RTM_GETLINK, flags)
153
154	if len(links) != 1 {
155		return LinkMessage{}, fmt.Errorf("too many/little matches, expected 1, actual %d", len(links))
156	}
157
158	return links[0], err
159}
160
161// Set sets interface attributes according to the LinkMessage information.
162//
163// ref: https://lwn.net/Articles/236919/
164// We explicitly use RTM_NEWLINK to set link attributes instead of
165// RTM_SETLINK because:
166// - using RTM_SETLINK is actually an old rtnetlink API, not supporting most
167//   attributes common today
168// - using RTM_NEWLINK is the prefered way to create AND update links
169// - RTM_NEWLINK is backward compatible to RTM_SETLINK
170func (l *LinkService) Set(req *LinkMessage) error {
171	flags := netlink.Request | netlink.Acknowledge
172	_, err := l.c.Execute(req, unix.RTM_NEWLINK, flags)
173
174	return err
175}
176
177func (l *LinkService) list(kind string) ([]LinkMessage, error) {
178	req := &LinkMessage{}
179	if kind != "" {
180		req.Attributes = &LinkAttributes{
181			Info: &LinkInfo{Kind: kind},
182		}
183	}
184
185	flags := netlink.Request | netlink.Dump
186	return l.execute(req, unix.RTM_GETLINK, flags)
187}
188
189// ListByKind retrieves all interfaces of a specific kind.
190func (l *LinkService) ListByKind(kind string) ([]LinkMessage, error) {
191	return l.list(kind)
192}
193
194// List retrieves all interfaces.
195func (l *LinkService) List() ([]LinkMessage, error) {
196	return l.list("")
197}
198
199// LinkAttributes contains all attributes for an interface.
200type LinkAttributes struct {
201	Address          net.HardwareAddr // Interface L2 address
202	Broadcast        net.HardwareAddr // L2 broadcast address
203	Name             string           // Device name
204	MTU              uint32           // MTU of the device
205	Type             uint32           // Link type
206	QueueDisc        string           // Queueing discipline
207	Master           *uint32          // Master device index (0 value un-enslaves)
208	OperationalState OperationalState // Interface operation state
209	Stats            *LinkStats       // Interface Statistics
210	Stats64          *LinkStats64     // Interface Statistics (64 bits version)
211	Info             *LinkInfo        // Detailed Interface Information
212	XDP              *LinkXDP         // Express Data Patch Information
213}
214
215// OperationalState represents an interface's operational state.
216type OperationalState uint8
217
218// Constants that represent operational state of an interface
219//
220// Adapted from https://elixir.bootlin.com/linux/v4.19.2/source/include/uapi/linux/if.h#L166
221const (
222	OperStateUnknown        OperationalState = iota // status could not be determined
223	OperStateNotPresent                             // down, due to some missing component (typically hardware)
224	OperStateDown                                   // down, either administratively or due to a fault
225	OperStateLowerLayerDown                         // down, due to lower-layer interfaces
226	OperStateTesting                                // operationally down, in some test mode
227	OperStateDormant                                // down, waiting for some external event
228	OperStateUp                                     // interface is in a state to send and receive packets
229)
230
231// unmarshalBinary unmarshals the contents of a byte slice into a LinkMessage.
232func (a *LinkAttributes) decode(ad *netlink.AttributeDecoder) error {
233	for ad.Next() {
234		switch ad.Type() {
235		case unix.IFLA_UNSPEC:
236			// unused attribute
237		case unix.IFLA_ADDRESS:
238			l := len(ad.Bytes())
239			if l < 4 || l > 32 {
240				return errInvalidLinkMessageAttr
241			}
242			a.Address = ad.Bytes()
243		case unix.IFLA_BROADCAST:
244			l := len(ad.Bytes())
245			if l < 4 || l > 32 {
246				return errInvalidLinkMessageAttr
247			}
248			a.Broadcast = ad.Bytes()
249		case unix.IFLA_IFNAME:
250			a.Name = ad.String()
251		case unix.IFLA_MTU:
252			a.MTU = ad.Uint32()
253		case unix.IFLA_LINK:
254			a.Type = ad.Uint32()
255		case unix.IFLA_QDISC:
256			a.QueueDisc = ad.String()
257		case unix.IFLA_OPERSTATE:
258			a.OperationalState = OperationalState(ad.Uint8())
259		case unix.IFLA_STATS:
260			a.Stats = &LinkStats{}
261			err := a.Stats.unmarshalBinary(ad.Bytes())
262			if err != nil {
263				return err
264			}
265		case unix.IFLA_STATS64:
266			a.Stats64 = &LinkStats64{}
267			err := a.Stats64.unmarshalBinary(ad.Bytes())
268			if err != nil {
269				return err
270			}
271		case unix.IFLA_LINKINFO:
272			a.Info = &LinkInfo{}
273			ad.Nested(a.Info.decode)
274		case unix.IFLA_MASTER:
275			v := ad.Uint32()
276			a.Master = &v
277		case unix.IFLA_XDP:
278			a.XDP = &LinkXDP{}
279			ad.Nested(a.XDP.decode)
280		}
281	}
282
283	return nil
284}
285
286// MarshalBinary marshals a LinkAttributes into a byte slice.
287func (a *LinkAttributes) encode(ae *netlink.AttributeEncoder) error {
288	ae.Uint16(unix.IFLA_UNSPEC, 0)
289	ae.String(unix.IFLA_IFNAME, a.Name)
290	ae.Uint32(unix.IFLA_LINK, a.Type)
291	ae.String(unix.IFLA_QDISC, a.QueueDisc)
292
293	if a.MTU != 0 {
294		ae.Uint32(unix.IFLA_MTU, a.MTU)
295	}
296
297	if len(a.Address) != 0 {
298		ae.Bytes(unix.IFLA_ADDRESS, a.Address)
299	}
300
301	if len(a.Broadcast) != 0 {
302		ae.Bytes(unix.IFLA_BROADCAST, a.Broadcast)
303	}
304
305	if a.OperationalState != OperStateUnknown {
306		ae.Uint8(unix.IFLA_OPERSTATE, uint8(a.OperationalState))
307	}
308
309	if a.Info != nil {
310		nae := netlink.NewAttributeEncoder()
311		nae.ByteOrder = ae.ByteOrder
312
313		err := a.Info.encode(nae)
314		if err != nil {
315			return err
316		}
317		b, err := nae.Encode()
318		if err != nil {
319			return err
320		}
321		ae.Bytes(unix.IFLA_LINKINFO, b)
322	}
323
324	if a.XDP != nil {
325		nae := netlink.NewAttributeEncoder()
326		nae.ByteOrder = ae.ByteOrder
327
328		err := a.XDP.encode(nae)
329		if err != nil {
330			return err
331		}
332		b, err := nae.Encode()
333		if err != nil {
334			return err
335		}
336
337		ae.Bytes(unix.IFLA_XDP, b)
338	}
339
340	if a.Master != nil {
341		ae.Uint32(unix.IFLA_MASTER, *a.Master)
342	}
343
344	return nil
345}
346
347// LinkStats contains packet statistics
348type LinkStats struct {
349	RXPackets  uint32 // total packets received
350	TXPackets  uint32 // total packets transmitted
351	RXBytes    uint32 // total bytes received
352	TXBytes    uint32 // total bytes transmitted
353	RXErrors   uint32 // bad packets received
354	TXErrors   uint32 // packet transmit problems
355	RXDropped  uint32 // no space in linux buffers
356	TXDropped  uint32 // no space available in linux
357	Multicast  uint32 // multicast packets received
358	Collisions uint32
359
360	// detailed rx_errors:
361	RXLengthErrors uint32
362	RXOverErrors   uint32 // receiver ring buff overflow
363	RXCRCErrors    uint32 // recved pkt with crc error
364	RXFrameErrors  uint32 // recv'd frame alignment error
365	RXFIFOErrors   uint32 // recv'r fifo overrun
366	RXMissedErrors uint32 // receiver missed packet
367
368	// detailed tx_errors
369	TXAbortedErrors   uint32
370	TXCarrierErrors   uint32
371	TXFIFOErrors      uint32
372	TXHeartbeatErrors uint32
373	TXWindowErrors    uint32
374
375	// for cslip etc
376	RXCompressed uint32
377	TXCompressed uint32
378
379	RXNoHandler uint32 // dropped, no handler found
380}
381
382// unmarshalBinary unmarshals the contents of a byte slice into a LinkMessage.
383func (a *LinkStats) unmarshalBinary(b []byte) error {
384	l := len(b)
385	if l != 92 && l != 96 {
386		return fmt.Errorf("incorrect size, want: 92 or 96")
387	}
388
389	a.RXPackets = nativeEndian.Uint32(b[0:4])
390	a.TXPackets = nativeEndian.Uint32(b[4:8])
391	a.RXBytes = nativeEndian.Uint32(b[8:12])
392	a.TXBytes = nativeEndian.Uint32(b[12:16])
393	a.RXErrors = nativeEndian.Uint32(b[16:20])
394	a.TXErrors = nativeEndian.Uint32(b[20:24])
395	a.RXDropped = nativeEndian.Uint32(b[24:28])
396	a.TXDropped = nativeEndian.Uint32(b[28:32])
397	a.Multicast = nativeEndian.Uint32(b[32:36])
398	a.Collisions = nativeEndian.Uint32(b[36:40])
399
400	a.RXLengthErrors = nativeEndian.Uint32(b[40:44])
401	a.RXOverErrors = nativeEndian.Uint32(b[44:48])
402	a.RXCRCErrors = nativeEndian.Uint32(b[48:52])
403	a.RXFrameErrors = nativeEndian.Uint32(b[52:56])
404	a.RXFIFOErrors = nativeEndian.Uint32(b[56:60])
405	a.RXMissedErrors = nativeEndian.Uint32(b[60:64])
406
407	a.TXAbortedErrors = nativeEndian.Uint32(b[64:68])
408	a.TXCarrierErrors = nativeEndian.Uint32(b[68:72])
409	a.TXFIFOErrors = nativeEndian.Uint32(b[72:76])
410	a.TXHeartbeatErrors = nativeEndian.Uint32(b[76:80])
411	a.TXWindowErrors = nativeEndian.Uint32(b[80:84])
412
413	a.RXCompressed = nativeEndian.Uint32(b[84:88])
414	a.TXCompressed = nativeEndian.Uint32(b[88:92])
415
416	if l == 96 {
417		a.RXNoHandler = nativeEndian.Uint32(b[92:96])
418	}
419
420	return nil
421}
422
423// LinkStats64 contains packet statistics
424type LinkStats64 struct {
425	RXPackets  uint64 // total packets received
426	TXPackets  uint64 // total packets transmitted
427	RXBytes    uint64 // total bytes received
428	TXBytes    uint64 // total bytes transmitted
429	RXErrors   uint64 // bad packets received
430	TXErrors   uint64 // packet transmit problems
431	RXDropped  uint64 // no space in linux buffers
432	TXDropped  uint64 // no space available in linux
433	Multicast  uint64 // multicast packets received
434	Collisions uint64
435
436	// detailed rx_errors:
437	RXLengthErrors uint64
438	RXOverErrors   uint64 // receiver ring buff overflow
439	RXCRCErrors    uint64 // recved pkt with crc error
440	RXFrameErrors  uint64 // recv'd frame alignment error
441	RXFIFOErrors   uint64 // recv'r fifo overrun
442	RXMissedErrors uint64 // receiver missed packet
443
444	// detailed tx_errors
445	TXAbortedErrors   uint64
446	TXCarrierErrors   uint64
447	TXFIFOErrors      uint64
448	TXHeartbeatErrors uint64
449	TXWindowErrors    uint64
450
451	// for cslip etc
452	RXCompressed uint64
453	TXCompressed uint64
454
455	RXNoHandler uint64 // dropped, no handler found
456}
457
458// unmarshalBinary unmarshals the contents of a byte slice into a LinkMessage.
459func (a *LinkStats64) unmarshalBinary(b []byte) error {
460	l := len(b)
461	if l != 184 && l != 192 {
462		return fmt.Errorf("incorrect size, want: 184 or 192")
463	}
464
465	a.RXPackets = nativeEndian.Uint64(b[0:8])
466	a.TXPackets = nativeEndian.Uint64(b[8:16])
467	a.RXBytes = nativeEndian.Uint64(b[16:24])
468	a.TXBytes = nativeEndian.Uint64(b[24:32])
469	a.RXErrors = nativeEndian.Uint64(b[32:40])
470	a.TXErrors = nativeEndian.Uint64(b[40:48])
471	a.RXDropped = nativeEndian.Uint64(b[48:56])
472	a.TXDropped = nativeEndian.Uint64(b[56:64])
473	a.Multicast = nativeEndian.Uint64(b[64:72])
474	a.Collisions = nativeEndian.Uint64(b[72:80])
475
476	a.RXLengthErrors = nativeEndian.Uint64(b[80:88])
477	a.RXOverErrors = nativeEndian.Uint64(b[88:96])
478	a.RXCRCErrors = nativeEndian.Uint64(b[96:104])
479	a.RXFrameErrors = nativeEndian.Uint64(b[104:112])
480	a.RXFIFOErrors = nativeEndian.Uint64(b[112:120])
481	a.RXMissedErrors = nativeEndian.Uint64(b[120:128])
482
483	a.TXAbortedErrors = nativeEndian.Uint64(b[128:136])
484	a.TXCarrierErrors = nativeEndian.Uint64(b[136:144])
485	a.TXFIFOErrors = nativeEndian.Uint64(b[144:152])
486	a.TXHeartbeatErrors = nativeEndian.Uint64(b[152:160])
487	a.TXWindowErrors = nativeEndian.Uint64(b[160:168])
488
489	a.RXCompressed = nativeEndian.Uint64(b[168:176])
490	a.TXCompressed = nativeEndian.Uint64(b[176:184])
491
492	if l == 192 {
493		a.RXNoHandler = nativeEndian.Uint64(b[184:192])
494	}
495
496	return nil
497}
498
499// LinkInfo contains data for specific network types
500type LinkInfo struct {
501	Kind      string // Driver name
502	Data      []byte // Driver specific configuration stored as nested Netlink messages
503	SlaveKind string // Slave driver name
504	SlaveData []byte // Slave driver specific configuration
505}
506
507func (i *LinkInfo) decode(ad *netlink.AttributeDecoder) error {
508	for ad.Next() {
509		switch ad.Type() {
510		case unix.IFLA_INFO_KIND:
511			i.Kind = ad.String()
512		case unix.IFLA_INFO_SLAVE_KIND:
513			i.SlaveKind = ad.String()
514		case unix.IFLA_INFO_DATA:
515			i.Data = ad.Bytes()
516		case unix.IFLA_INFO_SLAVE_DATA:
517			i.SlaveData = ad.Bytes()
518		}
519	}
520
521	return nil
522}
523
524func (i *LinkInfo) encode(ae *netlink.AttributeEncoder) error {
525	ae.String(unix.IFLA_INFO_KIND, i.Kind)
526	ae.Bytes(unix.IFLA_INFO_DATA, i.Data)
527
528	if len(i.SlaveData) > 0 {
529		ae.String(unix.IFLA_INFO_SLAVE_KIND, i.SlaveKind)
530		ae.Bytes(unix.IFLA_INFO_SLAVE_DATA, i.SlaveData)
531	}
532
533	return nil
534}
535
536// LinkXDP holds Express Data Path specific information
537type LinkXDP struct {
538	FD         int32
539	ExpectedFD int32
540	Attached   uint8
541	Flags      uint32
542	ProgID     uint32
543}
544
545func (xdp *LinkXDP) decode(ad *netlink.AttributeDecoder) error {
546	for ad.Next() {
547		switch ad.Type() {
548		case unix.IFLA_XDP_FD:
549			xdp.FD = ad.Int32()
550		case unix.IFLA_XDP_EXPECTED_FD:
551			xdp.ExpectedFD = ad.Int32()
552		case unix.IFLA_XDP_ATTACHED:
553			xdp.Attached = ad.Uint8()
554		case unix.IFLA_XDP_FLAGS:
555			xdp.Flags = ad.Uint32()
556		case unix.IFLA_XDP_PROG_ID:
557			xdp.ProgID = ad.Uint32()
558		}
559	}
560	return nil
561}
562
563func (xdp *LinkXDP) encode(ae *netlink.AttributeEncoder) error {
564	ae.Int32(unix.IFLA_XDP_FD, xdp.FD)
565	ae.Int32(unix.IFLA_XDP_EXPECTED_FD, xdp.ExpectedFD)
566	ae.Uint32(unix.IFLA_XDP_FLAGS, xdp.Flags)
567	// XDP_ATtACHED and XDP_PROG_ID are things that only can return from the kernel,
568	// not be send, so we don't encode them.
569	// source: https://elixir.bootlin.com/linux/v5.10.15/source/net/core/rtnetlink.c#L2894
570	return nil
571}
572