1 /*
2  * Copyright (c)2013-2020 ZeroTier, Inc.
3  *
4  * Use of this software is governed by the Business Source License included
5  * in the LICENSE.TXT file in the project's root directory.
6  *
7  * Change Date: 2025-01-01
8  *
9  * On the date above, in accordance with the Business Source License, use
10  * of this software will be governed by version 2.0 of the Apache License.
11  */
12 /****/
13 
14 #ifndef ZT_N_SWITCH_HPP
15 #define ZT_N_SWITCH_HPP
16 
17 #include <map>
18 #include <set>
19 #include <vector>
20 #include <list>
21 
22 #include "Constants.hpp"
23 #include "Mutex.hpp"
24 #include "MAC.hpp"
25 #include "Packet.hpp"
26 #include "Utils.hpp"
27 #include "InetAddress.hpp"
28 #include "Topology.hpp"
29 #include "Network.hpp"
30 #include "SharedPtr.hpp"
31 #include "IncomingPacket.hpp"
32 #include "Hashtable.hpp"
33 
34 /* Ethernet frame types that might be relevant to us */
35 #define ZT_ETHERTYPE_IPV4 0x0800
36 #define ZT_ETHERTYPE_ARP 0x0806
37 #define ZT_ETHERTYPE_RARP 0x8035
38 #define ZT_ETHERTYPE_ATALK 0x809b
39 #define ZT_ETHERTYPE_AARP 0x80f3
40 #define ZT_ETHERTYPE_IPX_A 0x8137
41 #define ZT_ETHERTYPE_IPX_B 0x8138
42 #define ZT_ETHERTYPE_IPV6 0x86dd
43 
44 namespace ZeroTier {
45 
46 class RuntimeEnvironment;
47 class Peer;
48 
49 /**
50  * Core of the distributed Ethernet switch and protocol implementation
51  *
52  * This class is perhaps a bit misnamed, but it's basically where everything
53  * meets. Transport-layer ZT packets come in here, as do virtual network
54  * packets from tap devices, and this sends them where they need to go and
55  * wraps/unwraps accordingly. It also handles queues and timeouts and such.
56  */
57 class Switch
58 {
59 	struct ManagedQueue;
60 	struct TXQueueEntry;
61 
62 	friend class SharedPtr<Peer>;
63 
64 	typedef struct {
65 		TXQueueEntry *p;
66 		bool ok_to_drop;
67 	} dqr;
68 
69 public:
70 	Switch(const RuntimeEnvironment *renv);
71 
72 	/**
73 	 * Called when a packet is received from the real network
74 	 *
75 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
76 	 * @param localSocket Local I/O socket as supplied by external code
77 	 * @param fromAddr Internet IP address of origin
78 	 * @param data Packet data
79 	 * @param len Packet length
80 	 */
81 	void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len);
82 
83 	/**
84 	 * Returns whether our bonding or balancing policy is aware of flows.
85 	 */
86 	bool isFlowAware();
87 
88 	/**
89 	 * Called when a packet comes from a local Ethernet tap
90 	 *
91 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
92 	 * @param network Which network's TAP did this packet come from?
93 	 * @param from Originating MAC address
94 	 * @param to Destination MAC address
95 	 * @param etherType Ethernet packet type
96 	 * @param vlanId VLAN ID or 0 if none
97 	 * @param data Ethernet payload
98 	 * @param len Frame length
99 	 */
100 	void onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len);
101 
102 	/**
103 	 * Determines the next drop schedule for packets in the TX queue
104 	 *
105 	 * @param t Current time
106 	 * @param count Number of packets dropped this round
107 	 */
108 	uint64_t control_law(uint64_t t, int count);
109 
110 	/**
111 	 * Selects a packet eligible for transmission from a TX queue. According to the control law, multiple packets
112 	 * may be intentionally dropped before a packet is returned to the AQM scheduler.
113 	 *
114 	 * @param q The TX queue that is being dequeued from
115 	 * @param now Current time
116 	 */
117 	dqr dodequeue(ManagedQueue *q, uint64_t now);
118 
119 	/**
120 	 * Presents a packet to the AQM scheduler.
121 	 *
122 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
123 	 * @param network Network that the packet shall be sent over
124 	 * @param packet Packet to be sent
125 	 * @param encrypt Encrypt packet payload? (always true except for HELLO)
126 	 * @param qosBucket Which bucket the rule-system determined this packet should fall into
127 	 */
128 	void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW);
129 
130 	/**
131 	 * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks
132 	 *
133 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
134 	 */
135 	void aqm_dequeue(void *tPtr);
136 
137 	/**
138 	 * Calls the dequeue mechanism and adjust queue state variables
139 	 *
140 	 * @param q The TX queue that is being dequeued from
141 	 * @param isNew Whether or not this queue is in the NEW list
142 	 * @param now Current time
143 	 */
144 	Switch::TXQueueEntry * CoDelDequeue(ManagedQueue *q, bool isNew, uint64_t now);
145 
146 	/**
147 	 * Removes QoS Queues and flow state variables for a specific network. These queues are created
148 	 * automatically upon the transmission of the first packet from this peer to another peer on the
149 	 * given network.
150 	 *
151 	 * The reason for existence of queues and flow state variables specific to each network is so that
152 	 * each network's QoS rules function independently.
153 	 *
154 	 * @param nwid Network ID
155 	 */
156 	void removeNetworkQoSControlBlock(uint64_t nwid);
157 
158 	/**
159 	 * Send a packet to a ZeroTier address (destination in packet)
160 	 *
161 	 * The packet must be fully composed with source and destination but not
162 	 * yet encrypted. If the destination peer is known the packet
163 	 * is sent immediately. Otherwise it is queued and a WHOIS is dispatched.
164 	 *
165 	 * The packet may be compressed. Compression isn't done here.
166 	 *
167 	 * Needless to say, the packet's source must be this node. Otherwise it
168 	 * won't be encrypted right. (This is not used for relaying.)
169 	 *
170 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
171 	 * @param packet Packet to send (buffer may be modified)
172 	 * @param encrypt Encrypt packet payload? (always true except for HELLO)
173 	 */
174 	void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW);
175 
176 	/**
177 	 * Request WHOIS on a given address
178 	 *
179 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
180 	 * @param now Current time
181 	 * @param addr Address to look up
182 	 */
183 	void requestWhois(void *tPtr,const int64_t now,const Address &addr);
184 
185 	/**
186 	 * Run any processes that are waiting for this peer's identity
187 	 *
188 	 * Called when we learn of a peer's identity from HELLO, OK(WHOIS), etc.
189 	 *
190 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
191 	 * @param peer New peer
192 	 */
193 	void doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer);
194 
195 	/**
196 	 * Perform retries and other periodic timer tasks
197 	 *
198 	 * This can return a very long delay if there are no pending timer
199 	 * tasks. The caller should cap this comparatively vs. other values.
200 	 *
201 	 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
202 	 * @param now Current time
203 	 * @return Number of milliseconds until doTimerTasks() should be run again
204 	 */
205 	unsigned long doTimerTasks(void *tPtr,int64_t now);
206 
207 private:
208 	bool _shouldUnite(const int64_t now,const Address &source,const Address &destination);
209 	bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true
210 	void _sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId);
211 
212 	const RuntimeEnvironment *const RR;
213 	int64_t _lastBeaconResponse;
214 	volatile int64_t _lastCheckedQueues;
215 
216 	// Time we last sent a WHOIS request for each address
217 	Hashtable< Address,int64_t > _lastSentWhoisRequest;
218 	Mutex _lastSentWhoisRequest_m;
219 
220 	// Packets waiting for WHOIS replies or other decode info or missing fragments
221 	struct RXQueueEntry
222 	{
RXQueueEntryZeroTier::Switch::RXQueueEntry223 		RXQueueEntry() : timestamp(0) {}
224 		volatile int64_t timestamp; // 0 if entry is not in use
225 		volatile uint64_t packetId;
226 		IncomingPacket frag0; // head of packet
227 		Packet::Fragment frags[ZT_MAX_PACKET_FRAGMENTS - 1]; // later fragments (if any)
228 		unsigned int totalFragments; // 0 if only frag0 received, waiting for frags
229 		uint32_t haveFragments; // bit mask, LSB to MSB
230 		volatile bool complete; // if true, packet is complete
231 		volatile int32_t flowId;
232 		Mutex lock;
233 	};
234 	RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];
235 	AtomicCounter _rxQueuePtr;
236 
237 	// Returns matching or next available RX queue entry
_findRXQueueEntry(uint64_t packetId)238 	inline RXQueueEntry *_findRXQueueEntry(uint64_t packetId)
239 	{
240 		const unsigned int current = static_cast<unsigned int>(_rxQueuePtr.load());
241 		for(unsigned int k=1;k<=ZT_RX_QUEUE_SIZE;++k) {
242 			RXQueueEntry *rq = &(_rxQueue[(current - k) % ZT_RX_QUEUE_SIZE]);
243 			if ((rq->packetId == packetId)&&(rq->timestamp))
244 				return rq;
245 		}
246 		++_rxQueuePtr;
247 		return &(_rxQueue[static_cast<unsigned int>(current) % ZT_RX_QUEUE_SIZE]);
248 	}
249 
250 	// Returns current entry in rx queue ring buffer and increments ring pointer
_nextRXQueueEntry()251 	inline RXQueueEntry *_nextRXQueueEntry()
252 	{
253 		return &(_rxQueue[static_cast<unsigned int>((++_rxQueuePtr) - 1) % ZT_RX_QUEUE_SIZE]);
254 	}
255 
256 	// ZeroTier-layer TX queue entry
257 	struct TXQueueEntry
258 	{
TXQueueEntryZeroTier::Switch::TXQueueEntry259 		TXQueueEntry() {}
TXQueueEntryZeroTier::Switch::TXQueueEntry260 		TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) :
261 			dest(d),
262 			creationTime(ct),
263 			packet(p),
264 			encrypt(enc),
265 			flowId(fid) {}
266 
267 		Address dest;
268 		uint64_t creationTime;
269 		Packet packet; // unencrypted/unMAC'd packet -- this is done at send time
270 		bool encrypt;
271 		int32_t flowId;
272 	};
273 	std::list< TXQueueEntry > _txQueue;
274 	Mutex _txQueue_m;
275 	Mutex _aqm_m;
276 
277 	// Tracks sending of VERB_RENDEZVOUS to relaying peers
278 	struct _LastUniteKey
279 	{
_LastUniteKeyZeroTier::Switch::_LastUniteKey280 		_LastUniteKey() : x(0),y(0) {}
_LastUniteKeyZeroTier::Switch::_LastUniteKey281 		_LastUniteKey(const Address &a1,const Address &a2)
282 		{
283 			if (a1 > a2) {
284 				x = a2.toInt();
285 				y = a1.toInt();
286 			} else {
287 				x = a1.toInt();
288 				y = a2.toInt();
289 			}
290 		}
hashCodeZeroTier::Switch::_LastUniteKey291 		inline unsigned long hashCode() const { return ((unsigned long)x ^ (unsigned long)y); }
operator ==ZeroTier::Switch::_LastUniteKey292 		inline bool operator==(const _LastUniteKey &k) const { return ((x == k.x)&&(y == k.y)); }
293 		uint64_t x,y;
294 	};
295 	Hashtable< _LastUniteKey,uint64_t > _lastUniteAttempt; // key is always sorted in ascending order, for set-like behavior
296 	Mutex _lastUniteAttempt_m;
297 
298 	// Queue with additional flow state variables
299 	struct ManagedQueue
300 	{
ManagedQueueZeroTier::Switch::ManagedQueue301 		ManagedQueue(int id) :
302 			id(id),
303 			byteCredit(ZT_AQM_QUANTUM),
304 			byteLength(0),
305 			dropping(false)
306 		{}
307 		int id;
308 		int byteCredit;
309 		int byteLength;
310 		uint64_t first_above_time;
311 		uint32_t count;
312 		uint64_t drop_next;
313 		bool dropping;
314 		uint64_t drop_next_time;
315 		std::list< TXQueueEntry *> q;
316 	};
317 	// To implement fq_codel we need to maintain a queue of queues
318 	struct NetworkQoSControlBlock
319 	{
320 		int _currEnqueuedPackets;
321 		std::vector<ManagedQueue *> newQueues;
322 		std::vector<ManagedQueue *> oldQueues;
323 		std::vector<ManagedQueue *> inactiveQueues;
324 	};
325 	std::map<uint64_t,NetworkQoSControlBlock*> _netQueueControlBlock;
326 };
327 
328 } // namespace ZeroTier
329 
330 #endif
331