1package netlink
2
3import (
4	"bytes"
5	"encoding/binary"
6	"errors"
7	"fmt"
8	"net"
9
10	"github.com/vishvananda/netlink/nl"
11	"golang.org/x/sys/unix"
12)
13
14// ConntrackTableType Conntrack table for the netlink operation
15type ConntrackTableType uint8
16
17const (
18	// ConntrackTable Conntrack table
19	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK		 1
20	ConntrackTable = 1
21	// ConntrackExpectTable Conntrack expect table
22	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
23	ConntrackExpectTable = 2
24)
25
26const (
27	// backward compatibility with golang 1.6 which does not have io.SeekCurrent
28	seekCurrent = 1
29)
30
31// InetFamily Family type
32type InetFamily uint8
33
34//  -L [table] [options]          List conntrack or expectation table
35//  -G [table] parameters         Get conntrack or expectation
36
37//  -I [table] parameters         Create a conntrack or expectation
38//  -U [table] parameters         Update a conntrack
39//  -E [table] [options]          Show events
40
41//  -C [table]                    Show counter
42//  -S                            Show statistics
43
44// ConntrackTableList returns the flow list of a table of a specific family
45// conntrack -L [table] [options]          List conntrack or expectation table
46func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
47	return pkgHandle.ConntrackTableList(table, family)
48}
49
50// ConntrackTableFlush flushes all the flows of a specified table
51// conntrack -F [table]            Flush table
52// The flush operation applies to all the family types
53func ConntrackTableFlush(table ConntrackTableType) error {
54	return pkgHandle.ConntrackTableFlush(table)
55}
56
57// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
58// conntrack -D [table] parameters         Delete conntrack or expectation
59func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
60	return pkgHandle.ConntrackDeleteFilter(table, family, filter)
61}
62
63// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
64// conntrack -L [table] [options]          List conntrack or expectation table
65func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
66	res, err := h.dumpConntrackTable(table, family)
67	if err != nil {
68		return nil, err
69	}
70
71	// Deserialize all the flows
72	var result []*ConntrackFlow
73	for _, dataRaw := range res {
74		result = append(result, parseRawData(dataRaw))
75	}
76
77	return result, nil
78}
79
80// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
81// conntrack -F [table]            Flush table
82// The flush operation applies to all the family types
83func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
84	req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
85	_, err := req.Execute(unix.NETLINK_NETFILTER, 0)
86	return err
87}
88
89// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
90// conntrack -D [table] parameters         Delete conntrack or expectation
91func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
92	res, err := h.dumpConntrackTable(table, family)
93	if err != nil {
94		return 0, err
95	}
96
97	var matched uint
98	for _, dataRaw := range res {
99		flow := parseRawData(dataRaw)
100		if match := filter.MatchConntrackFlow(flow); match {
101			req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
102			// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
103			req2.AddRawData(dataRaw[4:])
104			req2.Execute(unix.NETLINK_NETFILTER, 0)
105			matched++
106		}
107	}
108
109	return matched, nil
110}
111
112func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
113	// Create the Netlink request object
114	req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
115	// Add the netfilter header
116	msg := &nl.Nfgenmsg{
117		NfgenFamily: uint8(family),
118		Version:     nl.NFNETLINK_V0,
119		ResId:       0,
120	}
121	req.AddData(msg)
122	return req
123}
124
125func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
126	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP)
127	return req.Execute(unix.NETLINK_NETFILTER, 0)
128}
129
130// The full conntrack flow structure is very complicated and can be found in the file:
131// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
132// For the time being, the structure below allows to parse and extract the base information of a flow
133type ipTuple struct {
134	Bytes    uint64
135	DstIP    net.IP
136	DstPort  uint16
137	Packets  uint64
138	Protocol uint8
139	SrcIP    net.IP
140	SrcPort  uint16
141}
142
143type ConntrackFlow struct {
144	FamilyType uint8
145	Forward    ipTuple
146	Reverse    ipTuple
147	Mark       uint32
148}
149
150func (s *ConntrackFlow) String() string {
151	// conntrack cmd output:
152	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0
153	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d",
154		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
155		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
156		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
157		s.Mark)
158}
159
160// This method parse the ip tuple structure
161// The message structure is the following:
162// <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
163// <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
164// <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
165// <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
166// <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
167func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 {
168	for i := 0; i < 2; i++ {
169		_, t, _, v := parseNfAttrTLV(reader)
170		switch t {
171		case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
172			tpl.SrcIP = v
173		case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
174			tpl.DstIP = v
175		}
176	}
177	// Skip the next 4 bytes  nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
178	reader.Seek(4, seekCurrent)
179	_, t, _, v := parseNfAttrTLV(reader)
180	if t == nl.CTA_PROTO_NUM {
181		tpl.Protocol = uint8(v[0])
182	}
183	// Skip some padding 3 bytes
184	reader.Seek(3, seekCurrent)
185	for i := 0; i < 2; i++ {
186		_, t, _ := parseNfAttrTL(reader)
187		switch t {
188		case nl.CTA_PROTO_SRC_PORT:
189			parseBERaw16(reader, &tpl.SrcPort)
190		case nl.CTA_PROTO_DST_PORT:
191			parseBERaw16(reader, &tpl.DstPort)
192		}
193		// Skip some padding 2 byte
194		reader.Seek(2, seekCurrent)
195	}
196	return tpl.Protocol
197}
198
199func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
200	isNested, attrType, len = parseNfAttrTL(r)
201
202	value = make([]byte, len)
203	binary.Read(r, binary.BigEndian, &value)
204	return isNested, attrType, len, value
205}
206
207func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
208	binary.Read(r, nl.NativeEndian(), &len)
209	len -= nl.SizeofNfattr
210
211	binary.Read(r, nl.NativeEndian(), &attrType)
212	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
213	attrType = attrType & (nl.NLA_F_NESTED - 1)
214
215	return isNested, attrType, len
216}
217
218func parseBERaw16(r *bytes.Reader, v *uint16) {
219	binary.Read(r, binary.BigEndian, v)
220}
221
222func parseBERaw32(r *bytes.Reader, v *uint32) {
223	binary.Read(r, binary.BigEndian, v)
224}
225
226func parseBERaw64(r *bytes.Reader, v *uint64) {
227	binary.Read(r, binary.BigEndian, v)
228}
229
230func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
231	for i := 0; i < 2; i++ {
232		switch _, t, _ := parseNfAttrTL(r); t {
233		case nl.CTA_COUNTERS_BYTES:
234			parseBERaw64(r, &bytes)
235		case nl.CTA_COUNTERS_PACKETS:
236			parseBERaw64(r, &packets)
237		default:
238			return
239		}
240	}
241	return
242}
243
244func parseConnectionMark(r *bytes.Reader) (mark uint32) {
245	parseBERaw32(r, &mark)
246	return
247}
248
249func parseRawData(data []byte) *ConntrackFlow {
250	s := &ConntrackFlow{}
251	// First there is the Nfgenmsg header
252	// consume only the family field
253	reader := bytes.NewReader(data)
254	binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
255
256	// skip rest of the Netfilter header
257	reader.Seek(3, seekCurrent)
258	// The message structure is the following:
259	// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
260	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
261	// flow information of the forward flow
262	// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
263	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
264	// flow information of the reverse flow
265	for reader.Len() > 0 {
266		if nested, t, l := parseNfAttrTL(reader); nested {
267			switch t {
268			case nl.CTA_TUPLE_ORIG:
269				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
270					parseIpTuple(reader, &s.Forward)
271				}
272			case nl.CTA_TUPLE_REPLY:
273				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
274					parseIpTuple(reader, &s.Reverse)
275				} else {
276					// Header not recognized skip it
277					reader.Seek(int64(l), seekCurrent)
278				}
279			case nl.CTA_COUNTERS_ORIG:
280				s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
281			case nl.CTA_COUNTERS_REPLY:
282				s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
283			}
284		} else {
285			switch t {
286			case nl.CTA_MARK:
287				s.Mark = parseConnectionMark(reader)
288			}
289		}
290	}
291	return s
292}
293
294// Conntrack parameters and options:
295//   -n, --src-nat ip                      source NAT ip
296//   -g, --dst-nat ip                      destination NAT ip
297//   -j, --any-nat ip                      source or destination NAT ip
298//   -m, --mark mark                       Set mark
299//   -c, --secmark secmark                 Set selinux secmark
300//   -e, --event-mask eventmask            Event mask, eg. NEW,DESTROY
301//   -z, --zero                            Zero counters while listing
302//   -o, --output type[,...]               Output format, eg. xml
303//   -l, --label label[,...]               conntrack labels
304
305// Common parameters and options:
306//   -s, --src, --orig-src ip              Source address from original direction
307//   -d, --dst, --orig-dst ip              Destination address from original direction
308//   -r, --reply-src ip            Source address from reply direction
309//   -q, --reply-dst ip            Destination address from reply direction
310//   -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
311//   -f, --family proto            Layer 3 Protocol, eg. 'ipv6'
312//   -t, --timeout timeout         Set timeout
313//   -u, --status status           Set status, eg. ASSURED
314//   -w, --zone value              Set conntrack zone
315//   --orig-zone value             Set zone for original direction
316//   --reply-zone value            Set zone for reply direction
317//   -b, --buffer-size             Netlink socket buffer size
318//   --mask-src ip                 Source mask address
319//   --mask-dst ip                 Destination mask address
320
321// Filter types
322type ConntrackFilterType uint8
323
324const (
325	ConntrackOrigSrcIP  = iota                // -orig-src ip    Source address from original direction
326	ConntrackOrigDstIP                        // -orig-dst ip    Destination address from original direction
327	ConntrackReplySrcIP                       // --reply-src ip  Reply Source IP
328	ConntrackReplyDstIP                       // --reply-dst ip  Reply Destination IP
329	ConntrackReplyAnyIP                       // Match source or destination reply IP
330	ConntrackNatSrcIP   = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
331	ConntrackNatDstIP   = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
332	ConntrackNatAnyIP   = ConntrackReplyAnyIP // deprecated use instaed ConntrackReplyAnyIP
333)
334
335type CustomConntrackFilter interface {
336	// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches
337	// the filter or false otherwise
338	MatchConntrackFlow(flow *ConntrackFlow) bool
339}
340
341type ConntrackFilter struct {
342	ipFilter map[ConntrackFilterType]net.IP
343}
344
345// AddIP adds an IP to the conntrack filter
346func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
347	if f.ipFilter == nil {
348		f.ipFilter = make(map[ConntrackFilterType]net.IP)
349	}
350	if _, ok := f.ipFilter[tp]; ok {
351		return errors.New("Filter attribute already present")
352	}
353	f.ipFilter[tp] = ip
354	return nil
355}
356
357// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
358// false otherwise
359func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
360	if len(f.ipFilter) == 0 {
361		// empty filter always not match
362		return false
363	}
364
365	match := true
366	// -orig-src ip   Source address from original direction
367	if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
368		match = match && elem.Equal(flow.Forward.SrcIP)
369	}
370
371	// -orig-dst ip   Destination address from original direction
372	if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
373		match = match && elem.Equal(flow.Forward.DstIP)
374	}
375
376	// -src-nat ip    Source NAT ip
377	if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found {
378		match = match && elem.Equal(flow.Reverse.SrcIP)
379	}
380
381	// -dst-nat ip    Destination NAT ip
382	if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found {
383		match = match && elem.Equal(flow.Reverse.DstIP)
384	}
385
386	// Match source or destination reply IP
387	if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found {
388		match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
389	}
390
391	return match
392}
393
394var _ CustomConntrackFilter = (*ConntrackFilter)(nil)
395