1 /* 2 * Copyright (c)2013-2020 ZeroTier, Inc. 3 * 4 * Use of this software is governed by the Business Source License included 5 * in the LICENSE.TXT file in the project's root directory. 6 * 7 * Change Date: 2025-01-01 8 * 9 * On the date above, in accordance with the Business Source License, use 10 * of this software will be governed by version 2.0 of the Apache License. 11 */ 12 /****/ 13 14 #ifndef ZT_N_SWITCH_HPP 15 #define ZT_N_SWITCH_HPP 16 17 #include <map> 18 #include <set> 19 #include <vector> 20 #include <list> 21 22 #include "Constants.hpp" 23 #include "Mutex.hpp" 24 #include "MAC.hpp" 25 #include "Packet.hpp" 26 #include "Utils.hpp" 27 #include "InetAddress.hpp" 28 #include "Topology.hpp" 29 #include "Network.hpp" 30 #include "SharedPtr.hpp" 31 #include "IncomingPacket.hpp" 32 #include "Hashtable.hpp" 33 34 /* Ethernet frame types that might be relevant to us */ 35 #define ZT_ETHERTYPE_IPV4 0x0800 36 #define ZT_ETHERTYPE_ARP 0x0806 37 #define ZT_ETHERTYPE_RARP 0x8035 38 #define ZT_ETHERTYPE_ATALK 0x809b 39 #define ZT_ETHERTYPE_AARP 0x80f3 40 #define ZT_ETHERTYPE_IPX_A 0x8137 41 #define ZT_ETHERTYPE_IPX_B 0x8138 42 #define ZT_ETHERTYPE_IPV6 0x86dd 43 44 namespace ZeroTier { 45 46 class RuntimeEnvironment; 47 class Peer; 48 49 /** 50 * Core of the distributed Ethernet switch and protocol implementation 51 * 52 * This class is perhaps a bit misnamed, but it's basically where everything 53 * meets. Transport-layer ZT packets come in here, as do virtual network 54 * packets from tap devices, and this sends them where they need to go and 55 * wraps/unwraps accordingly. It also handles queues and timeouts and such. 56 */ 57 class Switch 58 { 59 struct ManagedQueue; 60 struct TXQueueEntry; 61 62 friend class SharedPtr<Peer>; 63 64 typedef struct { 65 TXQueueEntry *p; 66 bool ok_to_drop; 67 } dqr; 68 69 public: 70 Switch(const RuntimeEnvironment *renv); 71 72 /** 73 * Called when a packet is received from the real network 74 * 75 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 76 * @param localSocket Local I/O socket as supplied by external code 77 * @param fromAddr Internet IP address of origin 78 * @param data Packet data 79 * @param len Packet length 80 */ 81 void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len); 82 83 /** 84 * Returns whether our bonding or balancing policy is aware of flows. 85 */ 86 bool isFlowAware(); 87 88 /** 89 * Called when a packet comes from a local Ethernet tap 90 * 91 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 92 * @param network Which network's TAP did this packet come from? 93 * @param from Originating MAC address 94 * @param to Destination MAC address 95 * @param etherType Ethernet packet type 96 * @param vlanId VLAN ID or 0 if none 97 * @param data Ethernet payload 98 * @param len Frame length 99 */ 100 void onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const MAC &from,const MAC &to,unsigned int etherType,unsigned int vlanId,const void *data,unsigned int len); 101 102 /** 103 * Determines the next drop schedule for packets in the TX queue 104 * 105 * @param t Current time 106 * @param count Number of packets dropped this round 107 */ 108 uint64_t control_law(uint64_t t, int count); 109 110 /** 111 * Selects a packet eligible for transmission from a TX queue. According to the control law, multiple packets 112 * may be intentionally dropped before a packet is returned to the AQM scheduler. 113 * 114 * @param q The TX queue that is being dequeued from 115 * @param now Current time 116 */ 117 dqr dodequeue(ManagedQueue *q, uint64_t now); 118 119 /** 120 * Presents a packet to the AQM scheduler. 121 * 122 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 123 * @param network Network that the packet shall be sent over 124 * @param packet Packet to be sent 125 * @param encrypt Encrypt packet payload? (always true except for HELLO) 126 * @param qosBucket Which bucket the rule-system determined this packet should fall into 127 */ 128 void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW); 129 130 /** 131 * Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks 132 * 133 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 134 */ 135 void aqm_dequeue(void *tPtr); 136 137 /** 138 * Calls the dequeue mechanism and adjust queue state variables 139 * 140 * @param q The TX queue that is being dequeued from 141 * @param isNew Whether or not this queue is in the NEW list 142 * @param now Current time 143 */ 144 Switch::TXQueueEntry * CoDelDequeue(ManagedQueue *q, bool isNew, uint64_t now); 145 146 /** 147 * Removes QoS Queues and flow state variables for a specific network. These queues are created 148 * automatically upon the transmission of the first packet from this peer to another peer on the 149 * given network. 150 * 151 * The reason for existence of queues and flow state variables specific to each network is so that 152 * each network's QoS rules function independently. 153 * 154 * @param nwid Network ID 155 */ 156 void removeNetworkQoSControlBlock(uint64_t nwid); 157 158 /** 159 * Send a packet to a ZeroTier address (destination in packet) 160 * 161 * The packet must be fully composed with source and destination but not 162 * yet encrypted. If the destination peer is known the packet 163 * is sent immediately. Otherwise it is queued and a WHOIS is dispatched. 164 * 165 * The packet may be compressed. Compression isn't done here. 166 * 167 * Needless to say, the packet's source must be this node. Otherwise it 168 * won't be encrypted right. (This is not used for relaying.) 169 * 170 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 171 * @param packet Packet to send (buffer may be modified) 172 * @param encrypt Encrypt packet payload? (always true except for HELLO) 173 */ 174 void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); 175 176 /** 177 * Request WHOIS on a given address 178 * 179 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 180 * @param now Current time 181 * @param addr Address to look up 182 */ 183 void requestWhois(void *tPtr,const int64_t now,const Address &addr); 184 185 /** 186 * Run any processes that are waiting for this peer's identity 187 * 188 * Called when we learn of a peer's identity from HELLO, OK(WHOIS), etc. 189 * 190 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 191 * @param peer New peer 192 */ 193 void doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer); 194 195 /** 196 * Perform retries and other periodic timer tasks 197 * 198 * This can return a very long delay if there are no pending timer 199 * tasks. The caller should cap this comparatively vs. other values. 200 * 201 * @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call 202 * @param now Current time 203 * @return Number of milliseconds until doTimerTasks() should be run again 204 */ 205 unsigned long doTimerTasks(void *tPtr,int64_t now); 206 207 private: 208 bool _shouldUnite(const int64_t now,const Address &source,const Address &destination); 209 bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true 210 void _sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId); 211 212 const RuntimeEnvironment *const RR; 213 int64_t _lastBeaconResponse; 214 volatile int64_t _lastCheckedQueues; 215 216 // Time we last sent a WHOIS request for each address 217 Hashtable< Address,int64_t > _lastSentWhoisRequest; 218 Mutex _lastSentWhoisRequest_m; 219 220 // Packets waiting for WHOIS replies or other decode info or missing fragments 221 struct RXQueueEntry 222 { RXQueueEntryZeroTier::Switch::RXQueueEntry223 RXQueueEntry() : timestamp(0) {} 224 volatile int64_t timestamp; // 0 if entry is not in use 225 volatile uint64_t packetId; 226 IncomingPacket frag0; // head of packet 227 Packet::Fragment frags[ZT_MAX_PACKET_FRAGMENTS - 1]; // later fragments (if any) 228 unsigned int totalFragments; // 0 if only frag0 received, waiting for frags 229 uint32_t haveFragments; // bit mask, LSB to MSB 230 volatile bool complete; // if true, packet is complete 231 volatile int32_t flowId; 232 Mutex lock; 233 }; 234 RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE]; 235 AtomicCounter _rxQueuePtr; 236 237 // Returns matching or next available RX queue entry _findRXQueueEntry(uint64_t packetId)238 inline RXQueueEntry *_findRXQueueEntry(uint64_t packetId) 239 { 240 const unsigned int current = static_cast<unsigned int>(_rxQueuePtr.load()); 241 for(unsigned int k=1;k<=ZT_RX_QUEUE_SIZE;++k) { 242 RXQueueEntry *rq = &(_rxQueue[(current - k) % ZT_RX_QUEUE_SIZE]); 243 if ((rq->packetId == packetId)&&(rq->timestamp)) 244 return rq; 245 } 246 ++_rxQueuePtr; 247 return &(_rxQueue[static_cast<unsigned int>(current) % ZT_RX_QUEUE_SIZE]); 248 } 249 250 // Returns current entry in rx queue ring buffer and increments ring pointer _nextRXQueueEntry()251 inline RXQueueEntry *_nextRXQueueEntry() 252 { 253 return &(_rxQueue[static_cast<unsigned int>((++_rxQueuePtr) - 1) % ZT_RX_QUEUE_SIZE]); 254 } 255 256 // ZeroTier-layer TX queue entry 257 struct TXQueueEntry 258 { TXQueueEntryZeroTier::Switch::TXQueueEntry259 TXQueueEntry() {} TXQueueEntryZeroTier::Switch::TXQueueEntry260 TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) : 261 dest(d), 262 creationTime(ct), 263 packet(p), 264 encrypt(enc), 265 flowId(fid) {} 266 267 Address dest; 268 uint64_t creationTime; 269 Packet packet; // unencrypted/unMAC'd packet -- this is done at send time 270 bool encrypt; 271 int32_t flowId; 272 }; 273 std::list< TXQueueEntry > _txQueue; 274 Mutex _txQueue_m; 275 Mutex _aqm_m; 276 277 // Tracks sending of VERB_RENDEZVOUS to relaying peers 278 struct _LastUniteKey 279 { _LastUniteKeyZeroTier::Switch::_LastUniteKey280 _LastUniteKey() : x(0),y(0) {} _LastUniteKeyZeroTier::Switch::_LastUniteKey281 _LastUniteKey(const Address &a1,const Address &a2) 282 { 283 if (a1 > a2) { 284 x = a2.toInt(); 285 y = a1.toInt(); 286 } else { 287 x = a1.toInt(); 288 y = a2.toInt(); 289 } 290 } hashCodeZeroTier::Switch::_LastUniteKey291 inline unsigned long hashCode() const { return ((unsigned long)x ^ (unsigned long)y); } operator ==ZeroTier::Switch::_LastUniteKey292 inline bool operator==(const _LastUniteKey &k) const { return ((x == k.x)&&(y == k.y)); } 293 uint64_t x,y; 294 }; 295 Hashtable< _LastUniteKey,uint64_t > _lastUniteAttempt; // key is always sorted in ascending order, for set-like behavior 296 Mutex _lastUniteAttempt_m; 297 298 // Queue with additional flow state variables 299 struct ManagedQueue 300 { ManagedQueueZeroTier::Switch::ManagedQueue301 ManagedQueue(int id) : 302 id(id), 303 byteCredit(ZT_AQM_QUANTUM), 304 byteLength(0), 305 dropping(false) 306 {} 307 int id; 308 int byteCredit; 309 int byteLength; 310 uint64_t first_above_time; 311 uint32_t count; 312 uint64_t drop_next; 313 bool dropping; 314 uint64_t drop_next_time; 315 std::list< TXQueueEntry *> q; 316 }; 317 // To implement fq_codel we need to maintain a queue of queues 318 struct NetworkQoSControlBlock 319 { 320 int _currEnqueuedPackets; 321 std::vector<ManagedQueue *> newQueues; 322 std::vector<ManagedQueue *> oldQueues; 323 std::vector<ManagedQueue *> inactiveQueues; 324 }; 325 std::map<uint64_t,NetworkQoSControlBlock*> _netQueueControlBlock; 326 }; 327 328 } // namespace ZeroTier 329 330 #endif 331