1 /*
2  * Copyright (c)2019 ZeroTier, Inc.
3  *
4  * Use of this software is governed by the Business Source License included
5  * in the LICENSE.TXT file in the project's root directory.
6  *
7  * Change Date: 2025-01-01
8  *
9  * On the date above, in accordance with the Business Source License, use
10  * of this software will be governed by version 2.0 of the Apache License.
11  */
12 /****/
13 
14 #include <algorithm>
15 
16 #include "Constants.hpp"
17 #include "RuntimeEnvironment.hpp"
18 #include "Multicaster.hpp"
19 #include "Topology.hpp"
20 #include "Switch.hpp"
21 #include "Packet.hpp"
22 #include "Peer.hpp"
23 #include "C25519.hpp"
24 #include "CertificateOfMembership.hpp"
25 #include "Node.hpp"
26 #include "Network.hpp"
27 
28 namespace ZeroTier {
29 
Multicaster(const RuntimeEnvironment * renv)30 Multicaster::Multicaster(const RuntimeEnvironment *renv) :
31 	RR(renv),
32 	_groups(32)
33 {
34 }
35 
~Multicaster()36 Multicaster::~Multicaster()
37 {
38 }
39 
addMultiple(void * tPtr,int64_t now,uint64_t nwid,const MulticastGroup & mg,const void * addresses,unsigned int count,unsigned int totalKnown)40 void Multicaster::addMultiple(void *tPtr,int64_t now,uint64_t nwid,const MulticastGroup &mg,const void *addresses,unsigned int count,unsigned int totalKnown)
41 {
42 	const unsigned char *p = (const unsigned char *)addresses;
43 	const unsigned char *e = p + (5 * count);
44 	Mutex::Lock _l(_groups_m);
45 	MulticastGroupStatus &gs = _groups[Multicaster::Key(nwid,mg)];
46 	while (p != e) {
47 		_add(tPtr,now,nwid,mg,gs,Address(p,5));
48 		p += 5;
49 	}
50 }
51 
remove(uint64_t nwid,const MulticastGroup & mg,const Address & member)52 void Multicaster::remove(uint64_t nwid,const MulticastGroup &mg,const Address &member)
53 {
54 	Mutex::Lock _l(_groups_m);
55 	MulticastGroupStatus *s = _groups.get(Multicaster::Key(nwid,mg));
56 	if (s) {
57 		for(std::vector<MulticastGroupMember>::iterator m(s->members.begin());m!=s->members.end();++m) {
58 			if (m->address == member) {
59 				s->members.erase(m);
60 				break;
61 			}
62 		}
63 	}
64 }
65 
gather(const Address & queryingPeer,uint64_t nwid,const MulticastGroup & mg,Buffer<ZT_PROTO_MAX_PACKET_LENGTH> & appendTo,unsigned int limit) const66 unsigned int Multicaster::gather(const Address &queryingPeer,uint64_t nwid,const MulticastGroup &mg,Buffer<ZT_PROTO_MAX_PACKET_LENGTH> &appendTo,unsigned int limit) const
67 {
68 	unsigned char *p;
69 	unsigned int added = 0,i,k,rptr,totalKnown = 0;
70 	uint64_t a,picked[(ZT_PROTO_MAX_PACKET_LENGTH / 5) + 2];
71 
72 	if (!limit)
73 		return 0;
74 	else if (limit > 0xffff)
75 		limit = 0xffff;
76 
77 	const unsigned int totalAt = appendTo.size();
78 	appendTo.addSize(4); // sizeof(uint32_t)
79 	const unsigned int addedAt = appendTo.size();
80 	appendTo.addSize(2); // sizeof(uint16_t)
81 
82 	{ // Return myself if I am a member of this group
83 		SharedPtr<Network> network(RR->node->network(nwid));
84 		if ((network)&&(network->subscribedToMulticastGroup(mg,true))) {
85 			RR->identity.address().appendTo(appendTo);
86 			++totalKnown;
87 			++added;
88 		}
89 	}
90 
91 	Mutex::Lock _l(_groups_m);
92 
93 	const MulticastGroupStatus *s = _groups.get(Multicaster::Key(nwid,mg));
94 	if ((s)&&(!s->members.empty())) {
95 		totalKnown += (unsigned int)s->members.size();
96 
97 		// Members are returned in random order so that repeated gather queries
98 		// will return different subsets of a large multicast group.
99 		k = 0;
100 		while ((added < limit)&&(k < s->members.size())&&((appendTo.size() + ZT_ADDRESS_LENGTH) <= ZT_PROTO_MAX_PACKET_LENGTH)) {
101 			rptr = (unsigned int)RR->node->prng();
102 
103 restart_member_scan:
104 			a = s->members[rptr % (unsigned int)s->members.size()].address.toInt();
105 			for(i=0;i<k;++i) {
106 				if (picked[i] == a) {
107 					++rptr;
108 					goto restart_member_scan;
109 				}
110 			}
111 			picked[k++] = a;
112 
113 			if (queryingPeer.toInt() != a) { // do not return the peer that is making the request as a result
114 				p = (unsigned char *)appendTo.appendField(ZT_ADDRESS_LENGTH);
115 				*(p++) = (unsigned char)((a >> 32) & 0xff);
116 				*(p++) = (unsigned char)((a >> 24) & 0xff);
117 				*(p++) = (unsigned char)((a >> 16) & 0xff);
118 				*(p++) = (unsigned char)((a >> 8) & 0xff);
119 				*p = (unsigned char)(a & 0xff);
120 				++added;
121 			}
122 		}
123 	}
124 
125 	appendTo.setAt(totalAt,(uint32_t)totalKnown);
126 	appendTo.setAt(addedAt,(uint16_t)added);
127 
128 	return added;
129 }
130 
getMembers(uint64_t nwid,const MulticastGroup & mg,unsigned int limit) const131 std::vector<Address> Multicaster::getMembers(uint64_t nwid,const MulticastGroup &mg,unsigned int limit) const
132 {
133 	std::vector<Address> ls;
134 	Mutex::Lock _l(_groups_m);
135 	const MulticastGroupStatus *s = _groups.get(Multicaster::Key(nwid,mg));
136 	if (!s)
137 		return ls;
138 	for(std::vector<MulticastGroupMember>::const_reverse_iterator m(s->members.rbegin());m!=s->members.rend();++m) {
139 		ls.push_back(m->address);
140 		if (ls.size() >= limit)
141 			break;
142 	}
143 	return ls;
144 }
145 
send(void * tPtr,int64_t now,const SharedPtr<Network> & network,const Address & origin,const MulticastGroup & mg,const MAC & src,unsigned int etherType,const void * data,unsigned int len)146 void Multicaster::send(
147 	void *tPtr,
148 	int64_t now,
149 	const SharedPtr<Network> &network,
150 	const Address &origin,
151 	const MulticastGroup &mg,
152 	const MAC &src,
153 	unsigned int etherType,
154 	const void *data,
155 	unsigned int len)
156 {
157 	unsigned long idxbuf[4096];
158 	unsigned long *indexes = idxbuf;
159 
160 	// If we're in hub-and-spoke designated multicast replication mode, see if we
161 	// have a multicast replicator active. If so, pick the best and send it
162 	// there. If we are a multicast replicator or if none are alive, fall back
163 	// to sender replication. Note that bridges do not do this since this would
164 	// break bridge route learning. This is sort of an edge case limitation of
165 	// the current protocol and could be fixed, but fixing it would add more
166 	// complexity than the fix is probably worth. Bridges are generally high
167 	// bandwidth nodes.
168 	if (!network->config().isActiveBridge(RR->identity.address())) {
169 		Address multicastReplicators[ZT_MAX_NETWORK_SPECIALISTS];
170 		const unsigned int multicastReplicatorCount = network->config().multicastReplicators(multicastReplicators);
171 		if (multicastReplicatorCount) {
172 			if (std::find(multicastReplicators,multicastReplicators + multicastReplicatorCount,RR->identity.address()) == (multicastReplicators + multicastReplicatorCount)) {
173 				SharedPtr<Peer> bestMulticastReplicator;
174 				SharedPtr<Path> bestMulticastReplicatorPath;
175 				unsigned int bestMulticastReplicatorLatency = 0xffff;
176 				for(unsigned int i=0;i<multicastReplicatorCount;++i) {
177 					const SharedPtr<Peer> p(RR->topology->getPeerNoCache(multicastReplicators[i]));
178 					if ((p)&&(p->isAlive(now))) {
179 						const SharedPtr<Path> pp(p->getAppropriatePath(now,false));
180 						if ((pp)&&(pp->latency() < bestMulticastReplicatorLatency)) {
181 							bestMulticastReplicatorLatency = pp->latency();
182 							bestMulticastReplicatorPath = pp;
183 							bestMulticastReplicator = p;
184 						}
185 					}
186 				}
187 				if (bestMulticastReplicator) {
188 					Packet outp(bestMulticastReplicator->address(),RR->identity.address(),Packet::VERB_MULTICAST_FRAME);
189 					outp.append((uint64_t)network->id());
190 					outp.append((uint8_t)0x0c); // includes source MAC | please replicate
191 					((src) ? src : MAC(RR->identity.address(),network->id())).appendTo(outp);
192 					mg.mac().appendTo(outp);
193 					outp.append((uint32_t)mg.adi());
194 					outp.append((uint16_t)etherType);
195 					outp.append(data,len);
196 					if (!network->config().disableCompression()) outp.compress();
197 					outp.armor(bestMulticastReplicator->key(),true,bestMulticastReplicator->aesKeysIfSupported());
198 					bestMulticastReplicatorPath->send(RR,tPtr,outp.data(),outp.size(),now);
199 					return;
200 				}
201 			}
202 		}
203 	}
204 
205 	try {
206 		Mutex::Lock _l(_groups_m);
207 		MulticastGroupStatus &gs = _groups[Multicaster::Key(network->id(),mg)];
208 
209 		if (!gs.members.empty()) {
210 			// Allocate a memory buffer if group is monstrous
211 			if (gs.members.size() > (sizeof(idxbuf) / sizeof(unsigned long)))
212 				indexes = new unsigned long[gs.members.size()];
213 
214 			// Generate a random permutation of member indexes
215 			for(unsigned long i=0;i<gs.members.size();++i)
216 				indexes[i] = i;
217 			for(unsigned long i=(unsigned long)gs.members.size()-1;i>0;--i) {
218 				unsigned long j = (unsigned long)RR->node->prng() % (i + 1);
219 				unsigned long tmp = indexes[j];
220 				indexes[j] = indexes[i];
221 				indexes[i] = tmp;
222 			}
223 		}
224 
225 		Address activeBridges[ZT_MAX_NETWORK_SPECIALISTS];
226 		const unsigned int activeBridgeCount = network->config().activeBridges(activeBridges);
227 		const unsigned int limit = network->config().multicastLimit;
228 
229 		if (gs.members.size() >= limit) {
230 			// Skip queue if we already have enough members to complete the send operation
231 			OutboundMulticast out;
232 
233 			out.init(
234 				RR,
235 				now,
236 				network->id(),
237 				network->config().disableCompression(),
238 				limit,
239 				1, // we'll still gather a little from peers to keep multicast list fresh
240 				src,
241 				mg,
242 				etherType,
243 				data,
244 				len);
245 
246 			unsigned int count = 0;
247 
248 			for(unsigned int i=0;i<activeBridgeCount;++i) {
249 				if ((activeBridges[i] != RR->identity.address())&&(activeBridges[i] != origin)) {
250 					out.sendOnly(RR,tPtr,activeBridges[i]); // optimization: don't use dedup log if it's a one-pass send
251 					if (++count >= limit)
252 						break;
253 				}
254 			}
255 
256 			unsigned long idx = 0;
257 			while ((count < limit)&&(idx < gs.members.size())) {
258 				const Address ma(gs.members[indexes[idx++]].address);
259 				if ((std::find(activeBridges,activeBridges + activeBridgeCount,ma) == (activeBridges + activeBridgeCount))&&(ma != origin)) {
260 					out.sendOnly(RR,tPtr,ma); // optimization: don't use dedup log if it's a one-pass send
261 					++count;
262 				}
263 			}
264 		} else {
265 			while (gs.txQueue.size() >= ZT_TX_QUEUE_SIZE) {
266 				gs.txQueue.pop_front();
267 			}
268 
269 			const unsigned int gatherLimit = (limit - (unsigned int)gs.members.size()) + 1;
270 
271 			if ((gs.members.empty())||((now - gs.lastExplicitGather) >= ZT_MULTICAST_EXPLICIT_GATHER_DELAY)) {
272 				gs.lastExplicitGather = now;
273 
274 				Address explicitGatherPeers[16];
275 				unsigned int numExplicitGatherPeers = 0;
276 
277 				SharedPtr<Peer> bestRoot(RR->topology->getUpstreamPeer());
278 				if (bestRoot)
279 					explicitGatherPeers[numExplicitGatherPeers++] = bestRoot->address();
280 
281 				explicitGatherPeers[numExplicitGatherPeers++] = network->controller();
282 
283 				Address ac[ZT_MAX_NETWORK_SPECIALISTS];
284 				const unsigned int accnt = network->config().alwaysContactAddresses(ac);
285 				unsigned int shuffled[ZT_MAX_NETWORK_SPECIALISTS];
286 				for(unsigned int i=0;i<accnt;++i)
287 					shuffled[i] = i;
288 				for(unsigned int i=0,k=accnt>>1;i<k;++i) {
289 					const uint64_t x = RR->node->prng();
290 					const unsigned int x1 = shuffled[(unsigned int)x % accnt];
291 					const unsigned int x2 = shuffled[(unsigned int)(x >> 32) % accnt];
292 					const unsigned int tmp = shuffled[x1];
293 					shuffled[x1] = shuffled[x2];
294 					shuffled[x2] = tmp;
295 				}
296 				for(unsigned int i=0;i<accnt;++i) {
297 					explicitGatherPeers[numExplicitGatherPeers++] = ac[shuffled[i]];
298 					if (numExplicitGatherPeers == 16)
299 						break;
300 				}
301 
302 				std::vector<Address> anchors(network->config().anchors());
303 				for(std::vector<Address>::const_iterator a(anchors.begin());a!=anchors.end();++a) {
304 					if (*a != RR->identity.address()) {
305 						explicitGatherPeers[numExplicitGatherPeers++] = *a;
306 						if (numExplicitGatherPeers == 16)
307 							break;
308 					}
309 				}
310 
311 				for(unsigned int k=0;k<numExplicitGatherPeers;++k) {
312 					const CertificateOfMembership *com = (network) ? ((network->config().com) ? &(network->config().com) : (const CertificateOfMembership *)0) : (const CertificateOfMembership *)0;
313 					Packet outp(explicitGatherPeers[k],RR->identity.address(),Packet::VERB_MULTICAST_GATHER);
314 					outp.append(network->id());
315 					outp.append((uint8_t)((com) ? 0x01 : 0x00));
316 					mg.mac().appendTo(outp);
317 					outp.append((uint32_t)mg.adi());
318 					outp.append((uint32_t)gatherLimit);
319 					if (com)
320 						com->serialize(outp);
321 					RR->node->expectReplyTo(outp.packetId());
322 					RR->sw->send(tPtr,outp,true);
323 				}
324 			}
325 
326 			gs.txQueue.push_back(OutboundMulticast());
327 			OutboundMulticast &out = gs.txQueue.back();
328 
329 			out.init(
330 				RR,
331 				now,
332 				network->id(),
333 				network->config().disableCompression(),
334 				limit,
335 				gatherLimit,
336 				src,
337 				mg,
338 				etherType,
339 				data,
340 				len);
341 
342 			if (origin)
343 				out.logAsSent(origin);
344 
345 			unsigned int count = 0;
346 
347 			for(unsigned int i=0;i<activeBridgeCount;++i) {
348 				if (activeBridges[i] != RR->identity.address()) {
349 					out.sendAndLog(RR,tPtr,activeBridges[i]);
350 					if (++count >= limit)
351 						break;
352 				}
353 			}
354 
355 			unsigned long idx = 0;
356 			while ((count < limit)&&(idx < gs.members.size())) {
357 				Address ma(gs.members[indexes[idx++]].address);
358 				if (std::find(activeBridges,activeBridges + activeBridgeCount,ma) == (activeBridges + activeBridgeCount)) {
359 					out.sendAndLog(RR,tPtr,ma);
360 					++count;
361 				}
362 			}
363 		}
364 	} catch ( ... ) {} // this is a sanity check to catch any failures and make sure indexes[] still gets deleted
365 
366 	// Free allocated memory buffer if any
367 	if (indexes != idxbuf)
368 		delete [] indexes;
369 }
370 
clean(int64_t now)371 void Multicaster::clean(int64_t now)
372 {
373 	Mutex::Lock _l(_groups_m);
374 	Multicaster::Key *k = (Multicaster::Key *)0;
375 	MulticastGroupStatus *s = (MulticastGroupStatus *)0;
376 	Hashtable<Multicaster::Key,MulticastGroupStatus>::Iterator mm(_groups);
377 	while (mm.next(k,s)) {
378 		for(std::list<OutboundMulticast>::iterator tx(s->txQueue.begin());tx!=s->txQueue.end();) {
379 			if ((tx->expired(now))||(tx->atLimit()))
380 				s->txQueue.erase(tx++);
381 			else ++tx;
382 		}
383 
384 		unsigned long count = 0;
385 		{
386 			std::vector<MulticastGroupMember>::iterator reader(s->members.begin());
387 			std::vector<MulticastGroupMember>::iterator writer(reader);
388 			while (reader != s->members.end()) {
389 				if ((now - reader->timestamp) < ZT_MULTICAST_LIKE_EXPIRE) {
390 					*writer = *reader;
391 					++writer;
392 					++count;
393 				}
394 				++reader;
395 			}
396 		}
397 
398 		if (count) {
399 			s->members.resize(count);
400 		} else if (s->txQueue.empty()) {
401 			_groups.erase(*k);
402 		} else {
403 			s->members.clear();
404 		}
405 	}
406 }
407 
_add(void * tPtr,int64_t now,uint64_t nwid,const MulticastGroup & mg,MulticastGroupStatus & gs,const Address & member)408 void Multicaster::_add(void *tPtr,int64_t now,uint64_t nwid,const MulticastGroup &mg,MulticastGroupStatus &gs,const Address &member)
409 {
410 	// assumes _groups_m is locked
411 
412 	// Do not add self -- even if someone else returns it
413 	if (member == RR->identity.address())
414 		return;
415 
416 	std::vector<MulticastGroupMember>::iterator m(std::lower_bound(gs.members.begin(),gs.members.end(),member));
417 	if (m != gs.members.end()) {
418 		if (m->address == member) {
419 			m->timestamp = now;
420 			return;
421 		}
422 		gs.members.insert(m,MulticastGroupMember(member,now));
423 	} else {
424 		gs.members.push_back(MulticastGroupMember(member,now));
425 	}
426 
427 	for(std::list<OutboundMulticast>::iterator tx(gs.txQueue.begin());tx!=gs.txQueue.end();) {
428 		if (tx->atLimit())
429 			gs.txQueue.erase(tx++);
430 		else {
431 			tx->sendIfNew(RR,tPtr,member);
432 			if (tx->atLimit())
433 				gs.txQueue.erase(tx++);
434 			else ++tx;
435 		}
436 	}
437 }
438 
439 } // namespace ZeroTier
440