xref: /openbsd/usr.sbin/traceroute/traceroute.c (revision b2b634ff)
1 /*	$OpenBSD: traceroute.c,v 1.170 2024/08/21 15:00:25 florian Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*-
34  * Copyright (c) 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Van Jacobson.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 /*
66  * traceroute host  - trace the route ip packets follow going to "host".
67  *
68  * Attempt to trace the route an ip packet would follow to some
69  * internet host.  We find out intermediate hops by launching probe
70  * packets with a small ttl (time to live) then listening for an
71  * icmp "time exceeded" reply from a gateway.  We start our probes
72  * with a ttl of one and increase by one until we get an icmp "port
73  * unreachable" (which means we got to "host") or hit a max (which
74  * defaults to 64 hops & can be changed with the -m flag).  Three
75  * probes (change with -q flag) are sent at each ttl setting and a
76  * line is printed showing the ttl, address of the gateway and
77  * round trip time of each probe.  If the probe answers come from
78  * different gateways, the address of each responding system will
79  * be printed.  If there is no response within a 5 sec. timeout
80  * interval (changed with the -w flag), a "*" is printed for that
81  * probe.
82  *
83  * Probe packets are UDP format.  We don't want the destination
84  * host to process them so the destination port is set to an
85  * unlikely value (if some clod on the destination is using that
86  * value, it can be changed with the -p flag).
87  *
88  * A sample use might be:
89  *
90  *     [yak 71]% traceroute nis.nsf.net.
91  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
92  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
93  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
94  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
95  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
96  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
97  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
98  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
99  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
100  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
101  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
102  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
103  *
104  * Note that lines 2 & 3 are the same.  This is due to a buggy
105  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
106  * packets with a zero ttl.
107  *
108  * A more interesting example is:
109  *
110  *     [yak 72]% traceroute allspice.lcs.mit.edu.
111  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
112  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
113  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
114  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
115  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
116  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
117  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
118  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
119  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
120  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
121  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
122  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
123  *     12  * * *
124  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
125  *     14  * * *
126  *     15  * * *
127  *     16  * * *
128  *     17  * * *
129  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
130  *
131  * (I start to see why I'm having so much trouble with mail to
132  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
133  * either don't send ICMP "time exceeded" messages or send them
134  * with a ttl too small to reach us.  14 - 17 are running the
135  * MIT C Gateway code that doesn't send "time exceeded"s.  God
136  * only knows what's going on with 12.
137  *
138  * The silent gateway 12 in the above may be the result of a bug in
139  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
140  * sends an unreachable message using whatever ttl remains in the
141  * original datagram.  Since, for gateways, the remaining ttl is
142  * zero, the icmp "time exceeded" is guaranteed to not make it back
143  * to us.  The behavior of this bug is slightly more interesting
144  * when it appears on the destination system:
145  *
146  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
147  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
148  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
149  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
150  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
151  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
152  *      7  * * *
153  *      8  * * *
154  *      9  * * *
155  *     10  * * *
156  *     11  * * *
157  *     12  * * *
158  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
159  *
160  * Notice that there are 12 "gateways" (13 is the final
161  * destination) and exactly the last half of them are "missing".
162  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
163  * is using the ttl from our arriving datagram as the ttl in its
164  * icmp reply.  So, the reply will time out on the return path
165  * (with no notice sent to anyone since icmp's aren't sent for
166  * icmp's) until we probe with a ttl that's at least twice the path
167  * length.  I.e., rip is really only 7 hops away.  A reply that
168  * returns with a ttl of 1 is a clue this problem exists.
169  * Traceroute prints a "!" after the time if the ttl is <= 1.
170  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
171  * non-standard (HPUX) software, expect to see this problem
172  * frequently and/or take care picking the target host of your
173  * probes.
174  *
175  * Other possible annotations after the time are !H, !N, !P (got a host,
176  * network or protocol unreachable, respectively), !S or !F (source
177  * route failed or fragmentation needed -- neither of these should
178  * ever occur and the associated gateway is busted if you see one).  If
179  * almost all the probes result in some kind of unreachable, traceroute
180  * will give up and exit.
181  *
182  * Notes
183  * -----
184  * This program must be run by root or be setuid.  (I suggest that
185  * you *don't* make it setuid -- casual use could result in a lot
186  * of unnecessary traffic on our poor, congested nets.)
187  *
188  * This program requires a kernel mod that does not appear in any
189  * system available from Berkeley:  A raw ip socket using proto
190  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
191  * opposed to data to be wrapped in a ip datagram).  See the README
192  * file that came with the source to this program for a description
193  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
194  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
195  * MODIFIED TO RUN THIS PROGRAM.
196  *
197  * The udp port usage may appear bizarre (well, ok, it is bizarre).
198  * The problem is that an icmp message only contains 8 bytes of
199  * data from the original datagram.  8 bytes is the size of a udp
200  * header so, if we want to associate replies with the original
201  * datagram, the necessary information must be encoded into the
202  * udp header (the ip id could be used but there's no way to
203  * interlock with the kernel's assignment of ip id's and, anyway,
204  * it would have taken a lot more kernel hacking to allow this
205  * code to set the ip id).  So, to allow two or more users to
206  * use traceroute simultaneously, we use this task's pid as the
207  * source port (the high bit is set to move the port number out
208  * of the "likely" range).  To keep track of which probe is being
209  * replied to (so times and/or hop counts don't get confused by a
210  * reply that was delayed in transit), we increment the destination
211  * port number before each probe.
212  *
213  * Don't use this as a coding example.  I was trying to find a
214  * routing problem and this code sort-of popped out after 48 hours
215  * without sleep.  I was amazed it ever compiled, much less ran.
216  *
217  * I stole the idea for this program from Steve Deering.  Since
218  * the first release, I've learned that had I attended the right
219  * IETF working group meetings, I also could have stolen it from Guy
220  * Almes or Matt Mathis.  I don't know (or care) who came up with
221  * the idea first.  I envy the originators' perspicacity and I'm
222  * glad they didn't keep the idea a secret.
223  *
224  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
225  * enhancements to the original distribution.
226  *
227  * I've hacked up a round-trip-route version of this that works by
228  * sending a loose-source-routed udp datagram through the destination
229  * back to yourself.  Unfortunately, SO many gateways botch source
230  * routing, the thing is almost worthless.  Maybe one day...
231  *
232  *  -- Van Jacobson (van@helios.ee.lbl.gov)
233  *     Tue Dec 20 03:50:13 PST 1988
234  */
235 
236 #include <sys/socket.h>
237 #include <sys/sysctl.h>
238 #include <sys/time.h>
239 #include <sys/uio.h>
240 
241 #include <netinet/in.h>
242 #include <netinet/ip.h>
243 #include <netinet/ip6.h>
244 #include <netinet/ip_icmp.h>
245 #include <netinet/icmp6.h>
246 #include <netinet/udp.h>
247 
248 #include <arpa/inet.h>
249 
250 #include <err.h>
251 #include <errno.h>
252 #include <event.h>
253 #include <limits.h>
254 #include <netdb.h>
255 #include <pwd.h>
256 #include <stdio.h>
257 #include <stdlib.h>
258 #include <string.h>
259 #include <unistd.h>
260 
261 #include "traceroute.h"
262 
263 int32_t	 sec_perturb;
264 int32_t	 usec_perturb;
265 
266 u_char	 packet[512];
267 u_char	*outpacket;	/* last inbound (icmp) packet */
268 
269 int	rcvsock;	/* receive (icmp) socket file descriptor */
270 int	sndsock;	/* send (udp) socket file descriptor */
271 
272 int	rcvhlim;
273 struct in6_pktinfo *rcvpktinfo;
274 
275 int	datalen;	/* How much data */
276 
277 char	*hostname;
278 
279 u_int16_t	srcport;
280 
281 void	usage(void);
282 
283 #define	TRACEROUTE_USER	"_traceroute"
284 
285 void	sock_read(int, short, void *);
286 void	send_timer(int, short, void *);
287 
288 struct tr_conf		*conf;	/* configuration defaults */
289 struct tr_result	*tr_results;
290 struct sockaddr_in	 from4, to4;
291 struct sockaddr_in6	 from6, to6;
292 struct sockaddr		*from, *to;
293 struct msghdr		 rcvmhdr;
294 struct event		 timer_ev;
295 int			 v6flag;
296 int			*waiting_ttls;
297 int			 last_tos = 0;
298 
299 int
main(int argc,char * argv[])300 main(int argc, char *argv[])
301 {
302 	int	mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
303 	char	hbuf[NI_MAXHOST];
304 
305 	struct addrinfo		 hints, *res;
306 	struct ip		*ip = NULL;
307 	struct iovec		 rcviov[2];
308 	static u_char		*rcvcmsgbuf;
309 	struct passwd		*pw;
310 	struct event		 sock_ev;
311 	struct timeval		tv = {0, 0};
312 
313 	long		 l;
314 	socklen_t	 len;
315 	size_t		 size;
316 
317 	int		 ch;
318 	int		 on = 1;
319 	int		 error;
320 	int		 headerlen;	/* How long packet's header is */
321 	int		 i;
322 	int		 packetlen;
323 	int		 rcvcmsglen;
324 	int		 rcvsock4, rcvsock6;
325 	int		 sndsock4, sndsock6;
326 	u_int32_t	 tmprnd;
327 	int		 v4sock_errno, v6sock_errno;
328 
329 	char		*dest;
330 	const char	*errstr;
331 
332 	uid_t		 ouid, uid;
333 	gid_t		 gid;
334 
335 	/* Cannot pledge due to special setsockopt()s below */
336 	if (unveil("/", "r") == -1)
337 		err(1, "unveil /");
338 	if (unveil(NULL, NULL) == -1)
339 		err(1, "unveil");
340 
341 	if ((conf = calloc(1, sizeof(*conf))) == NULL)
342 		err(1,NULL);
343 
344 	conf->first_ttl = 1;
345 	conf->proto = IPPROTO_UDP;
346 	conf->max_ttl = IPDEFTTL;
347 	conf->nprobes = 3;
348 	conf->expected_responses = 2; /* icmp + DNS */
349 
350 	/* start udp dest port # for probe packets */
351 	conf->port = 32768+666;
352 
353 	memset(&rcvmhdr, 0, sizeof(rcvmhdr));
354 	memset(&rcviov, 0, sizeof(rcviov));
355 
356 	rcvsock4 = rcvsock6 = sndsock4 = sndsock6 = -1;
357 	v4sock_errno = v6sock_errno = 0;
358 
359 	conf->waittime = 3 * 1000;
360 
361 	if ((rcvsock6 = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == -1)
362 		v6sock_errno = errno;
363 	else if ((sndsock6 = socket(AF_INET6, SOCK_DGRAM, 0)) == -1)
364 		v6sock_errno = errno;
365 
366 	if ((rcvsock4 = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) == -1)
367 		v4sock_errno = errno;
368 	else if ((sndsock4 = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) == -1)
369 		v4sock_errno = errno;
370 
371 	/* revoke privs */
372 	ouid = getuid();
373 	if (ouid == 0 && (pw = getpwnam(TRACEROUTE_USER)) != NULL) {
374 		uid = pw->pw_uid;
375 		gid = pw->pw_gid;
376 	} else {
377 		uid = getuid();
378 		gid = getgid();
379 	}
380 	if (ouid && (setgroups(1, &gid) ||
381 	    setresgid(gid, gid, gid) ||
382 	    setresuid(uid, uid, uid)))
383 		err(1, "unable to revoke privs");
384 
385 	if (strcmp("traceroute6", __progname) == 0) {
386 		v6flag = 1;
387 		if (v6sock_errno != 0)
388 			errc(5, v6sock_errno, rcvsock6 < 0 ? "socket(ICMPv6)" :
389 			    "socket(SOCK_DGRAM)");
390 		rcvsock = rcvsock6;
391 		sndsock = sndsock6;
392 		if (rcvsock4 >= 0)
393 			close(rcvsock4);
394 		if (sndsock4 >= 0)
395 			close(sndsock4);
396 	} else {
397 		if (v4sock_errno != 0)
398 			errc(5, v4sock_errno, rcvsock4 < 0 ? "icmp socket" :
399 			    "raw socket");
400 		rcvsock = rcvsock4;
401 		sndsock = sndsock4;
402 		if (rcvsock6 >= 0)
403 			close(rcvsock6);
404 		if (sndsock6 >= 0)
405 			close(sndsock6);
406 	}
407 
408 	if (v6flag) {
409 		mib[1] = PF_INET6;
410 		mib[2] = IPPROTO_IPV6;
411 		mib[3] = IPV6CTL_DEFHLIM;
412 		/* specify to tell receiving interface */
413 		if (setsockopt(rcvsock, IPPROTO_IPV6, IPV6_RECVPKTINFO, &on,
414 		    sizeof(on)) == -1)
415 			err(1, "setsockopt(IPV6_RECVPKTINFO)");
416 
417 		/* specify to tell hoplimit field of received IP6 hdr */
418 		if (setsockopt(rcvsock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &on,
419 		    sizeof(on)) == -1)
420 			err(1, "setsockopt(IPV6_RECVHOPLIMIT)");
421 	}
422 
423 	size = sizeof(i);
424 	if (sysctl(mib, sizeof(mib)/sizeof(mib[0]), &i, &size, NULL, 0) == -1)
425 		err(1, "sysctl");
426 	conf->max_ttl = i;
427 
428 	while ((ch = getopt(argc, argv, v6flag ? "ADdf:Ilm:np:q:Ss:t:w:vV:" :
429 	    "ADdf:g:Ilm:nP:p:q:Ss:t:V:vw:x")) != -1)
430 		switch (ch) {
431 		case 'A':
432 			conf->Aflag = 1;
433 			conf->expected_responses++;
434 			break;
435 		case 'd':
436 			conf->dflag = 1;
437 			break;
438 		case 'D':
439 			conf->dump = 1;
440 			break;
441 		case 'f':
442 			conf->first_ttl = strtonum(optarg, 1, conf->max_ttl,
443 			    &errstr);
444 			if (errstr)
445 				errx(1, "min ttl must be 1 to %u.",
446 				    conf->max_ttl);
447 			break;
448 		case 'g':
449 			if (conf->lsrr >= MAX_LSRR)
450 				errx(1, "too many gateways; max %d", MAX_LSRR);
451 			memset(&hints, 0, sizeof(hints));
452 			hints.ai_family = AF_INET;
453 
454 			if (getaddrinfo(optarg, NULL, &hints, &res) != 0)
455 				errx(1, "unknown host %s", optarg);
456 
457 			conf->gateway[conf->lsrr] =
458 			    ((struct sockaddr_in *)res->ai_addr)->sin_addr;
459 			freeaddrinfo(res);
460 
461 			if (++conf->lsrr == 1)
462 				conf->lsrrlen = 4;
463 			conf->lsrrlen += 4;
464 			break;
465 		case 'I':
466 			if (conf->protoset)
467 				errx(1, "protocol already set with -P");
468 			conf->protoset = 1;
469 			conf->proto = IPPROTO_ICMP;
470 			break;
471 		case 'l':
472 			conf->ttl_flag = 1;
473 			break;
474 		case 'm':
475 			conf->max_ttl = strtonum(optarg, conf->first_ttl,
476 			    MAXTTL, &errstr);
477 			if (errstr)
478 				errx(1, "max ttl must be %u to %u.",
479 				    conf->first_ttl, MAXTTL);
480 			break;
481 		case 'n':
482 			conf->nflag = 1;
483 			conf->expected_responses--;
484 			break;
485 		case 'p':
486 			conf->port = strtonum(optarg, 1, 65535, &errstr);
487 			if (errstr)
488 				errx(1, "port must be >0, <65536.");
489 			break;
490 		case 'P':
491 			if (conf->protoset)
492 				errx(1, "protocol already set with -I");
493 			conf->protoset = 1;
494 			conf->proto = strtonum(optarg, 1, IPPROTO_MAX - 1,
495 			    &errstr);
496 			if (errstr) {
497 				struct protoent *pent;
498 
499 				pent = getprotobyname(optarg);
500 				if (pent)
501 					conf->proto = pent->p_proto;
502 				else
503 					errx(1, "proto must be >=1, or a "
504 					    "name.");
505 			}
506 			break;
507 		case 'q':
508 			conf->nprobes = strtonum(optarg, 1, 1024, &errstr);
509 			if (errstr)
510 				errx(1, "nprobes must be >0.");
511 			break;
512 		case 's':
513 			/*
514 			 * set the ip source address of the outbound
515 			 * probe (e.g., on a multi-homed host).
516 			 */
517 			conf->source = optarg;
518 			break;
519 		case 'S':
520 			conf->sump = 1;
521 			break;
522 		case 't':
523 			if (!map_tos(optarg, &conf->tos)) {
524 				if (strlen(optarg) > 1 && optarg[0] == '0' &&
525 				    optarg[1] == 'x') {
526 					char *ep;
527 					errno = 0;
528 					ep = NULL;
529 					l = strtol(optarg, &ep, 16);
530 					if (errno || !*optarg || *ep ||
531 					    l < 0 || l > 255)
532 						errx(1, "illegal tos value %s",
533 						    optarg);
534 					conf->tos = (int)l;
535 				} else {
536 					conf->tos = strtonum(optarg, 0, 255,
537 					    &errstr);
538 					if (errstr)
539 						errx(1, "illegal tos value %s",
540 						    optarg);
541 				}
542 			}
543 			conf->tflag = 1;
544 			last_tos = conf->tos;
545 			break;
546 		case 'v':
547 			conf->verbose = 1;
548 			break;
549 		case 'V':
550 			conf->rtableid = (unsigned int)strtonum(optarg, 0,
551 			    RT_TABLEID_MAX, &errstr);
552 			if (errstr)
553 				errx(1, "rtable value is %s: %s",
554 				    errstr, optarg);
555 			if (setsockopt(sndsock, SOL_SOCKET, SO_RTABLE,
556 			    &conf->rtableid, sizeof(conf->rtableid)) == -1)
557 				err(1, "setsockopt SO_RTABLE");
558 			if (setsockopt(rcvsock, SOL_SOCKET, SO_RTABLE,
559 			    &conf->rtableid, sizeof(conf->rtableid)) == -1)
560 				err(1, "setsockopt SO_RTABLE");
561 			break;
562 		case 'w':
563 			conf->waittime = strtonum(optarg, 1, INT_MAX, &errstr);
564 			if (errstr)
565 				errx(1, "wait must be >=1 sec.");
566 			conf->waittime *= 1000;
567 			break;
568 		case 'x':
569 			conf->xflag = 1;
570 			break;
571 		default:
572 			usage();
573 		}
574 
575 	if (ouid == 0 && (setgroups(1, &gid) ||
576 	    setresgid(gid, gid, gid) ||
577 	    setresuid(uid, uid, uid)))
578 		err(1, "unable to revoke privs");
579 
580 	argc -= optind;
581 	argv += optind;
582 
583 	if (argc < 1 || argc > 2)
584 		usage();
585 
586 	tr_results = calloc(sizeof(struct tr_result), conf->max_ttl *
587 	    conf->nprobes);
588 	if (tr_results == NULL)
589 		err(1, NULL);
590 
591 	waiting_ttls = calloc(sizeof(int), conf->max_ttl);
592 	for (i = 0; i < conf->max_ttl; i++)
593 		waiting_ttls[i] = conf->nprobes * conf->expected_responses;
594 
595 	setvbuf(stdout, NULL, _IOLBF, 0);
596 
597 	conf->ident = (getpid() & 0xffff) | 0x8000;
598 	tmprnd = arc4random();
599 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
600 	    (tmprnd & 0x7ff);
601 	usec_perturb = arc4random();
602 
603 	memset(&to4, 0, sizeof(to4));
604 	memset(&to6, 0, sizeof(to6));
605 
606 	dest = *argv;
607 
608 	memset(&hints, 0, sizeof(hints));
609 	hints.ai_family = v6flag ? PF_INET6 : PF_INET;
610 	hints.ai_socktype = SOCK_RAW;
611 	hints.ai_protocol = 0;
612 	hints.ai_flags = AI_CANONNAME;
613 	if ((error = getaddrinfo(dest, NULL, &hints, &res)))
614 		errx(1, "%s", gai_strerror(error));
615 
616 	switch (res->ai_family) {
617 	case AF_INET:
618 		to = (struct sockaddr *)&to4;
619 		from = (struct sockaddr *)&from4;
620 		break;
621 	case AF_INET6:
622 		to = (struct sockaddr *)&to6;
623 		from = (struct sockaddr *)&from6;
624 		break;
625 	default:
626 		errx(1, "unsupported AF: %d", res->ai_family);
627 		break;
628 	}
629 
630 	memcpy(to, res->ai_addr, res->ai_addrlen);
631 
632 	if (!hostname) {
633 		hostname = res->ai_canonname ? strdup(res->ai_canonname) : dest;
634 		if (!hostname)
635 			errx(1, "malloc");
636 	}
637 
638 	if (res->ai_next) {
639 		if (getnameinfo(res->ai_addr, res->ai_addrlen, hbuf,
640 		    sizeof(hbuf), NULL, 0, NI_NUMERICHOST) != 0)
641 			strlcpy(hbuf, "?", sizeof(hbuf));
642 		warnx("Warning: %s has multiple "
643 		    "addresses; using %s", hostname, hbuf);
644 	}
645 	freeaddrinfo(res);
646 
647 	if (*++argv) {
648 		datalen = strtonum(*argv, 0, INT_MAX, &errstr);
649 		if (errstr)
650 			errx(1, "datalen out of range");
651 	}
652 
653 	switch (to->sa_family) {
654 	case AF_INET:
655 		switch (conf->proto) {
656 		case IPPROTO_UDP:
657 			headerlen = (sizeof(struct ip) + conf->lsrrlen +
658 			    sizeof(struct udphdr) + sizeof(struct packetdata));
659 			break;
660 		case IPPROTO_ICMP:
661 			headerlen = (sizeof(struct ip) + conf->lsrrlen +
662 			    sizeof(struct icmp) + sizeof(struct packetdata));
663 			break;
664 		default:
665 			headerlen = (sizeof(struct ip) + conf->lsrrlen +
666 			    sizeof(struct packetdata));
667 		}
668 
669 		if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
670 			errx(1, "packet size must be 0 to %d.",
671 			    IP_MAXPACKET - headerlen);
672 
673 		datalen += headerlen;
674 
675 		if ((outpacket = calloc(1, datalen)) == NULL)
676 			err(1, "calloc");
677 
678 		rcviov[0].iov_base = (caddr_t)packet;
679 		rcviov[0].iov_len = sizeof(packet);
680 		rcvmhdr.msg_name = (caddr_t)&from4;
681 		rcvmhdr.msg_namelen = sizeof(from4);
682 		rcvmhdr.msg_iov = rcviov;
683 		rcvmhdr.msg_iovlen = 1;
684 		rcvmhdr.msg_control = NULL;
685 		rcvmhdr.msg_controllen = 0;
686 
687 		ip = (struct ip *)outpacket;
688 		if (conf->lsrr != 0) {
689 			u_char *p = (u_char *)(ip + 1);
690 
691 			*p++ = IPOPT_NOP;
692 			*p++ = IPOPT_LSRR;
693 			*p++ = conf->lsrrlen - 1;
694 			*p++ = IPOPT_MINOFF;
695 			conf->gateway[conf->lsrr] = to4.sin_addr;
696 			for (i = 1; i <= conf->lsrr; i++) {
697 				memcpy(p, &conf->gateway[i],
698 				    sizeof(struct in_addr));
699 				p += sizeof(struct in_addr);
700 			}
701 			ip->ip_dst = conf->gateway[0];
702 		} else
703 			ip->ip_dst = to4.sin_addr;
704 		ip->ip_off = htons(0);
705 		ip->ip_hl = (sizeof(struct ip) + conf->lsrrlen) >> 2;
706 		ip->ip_p = conf->proto;
707 		ip->ip_v = IPVERSION;
708 		ip->ip_tos = conf->tos;
709 
710 		if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL,
711 		    &on, sizeof(on)) == -1)
712 			err(6, "IP_HDRINCL");
713 
714 		if (conf->source) {
715 			memset(&from4, 0, sizeof(from4));
716 			from4.sin_family = AF_INET;
717 			if (inet_pton(AF_INET, conf->source, &from4.sin_addr)
718 			    != 1)
719 				errx(1, "unknown host %s", conf->source);
720 			ip->ip_src = from4.sin_addr;
721 			if (ouid != 0 &&
722 			    (ntohl(from4.sin_addr.s_addr) & 0xff000000U) ==
723 			    0x7f000000U && (ntohl(to4.sin_addr.s_addr) &
724 			    0xff000000U) != 0x7f000000U)
725 				errx(1, "source is on 127/8, destination is"
726 				    " not");
727 			if (ouid && bind(sndsock, (struct sockaddr *)&from4,
728 			    sizeof(from4)) == -1)
729 				err(1, "bind");
730 		}
731 		packetlen = datalen;
732 		break;
733 	case AF_INET6:
734 		/*
735 		 * packetlen is the size of the complete IP packet sent and
736 		 * reported in the first line of output.
737 		 * For IPv4 this is equal to datalen since we are constructing
738 		 * a raw packet.
739 		 * For IPv6 we need to always add the size of the IP6 header
740 		 * and for UDP packets the size of the UDP header since they
741 		 * are prepended to the packet by the kernel
742 		 */
743 		packetlen = sizeof(struct ip6_hdr);
744 		switch (conf->proto) {
745 		case IPPROTO_UDP:
746 			headerlen = sizeof(struct packetdata);
747 			packetlen += sizeof(struct udphdr);
748 			break;
749 		case IPPROTO_ICMP:
750 			headerlen = sizeof(struct icmp6_hdr) +
751 			    sizeof(struct packetdata);
752 			break;
753 		default:
754 			errx(1, "Unsupported proto: %hhu", conf->proto);
755 			break;
756 		}
757 
758 		if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
759 			errx(1, "packet size must be 0 to %d.",
760 			    IP_MAXPACKET - headerlen);
761 
762 		datalen += headerlen;
763 		packetlen += datalen;
764 
765 		if ((outpacket = calloc(1, datalen)) == NULL)
766 			err(1, "calloc");
767 
768 		/* initialize msghdr for receiving packets */
769 		rcviov[0].iov_base = (caddr_t)packet;
770 		rcviov[0].iov_len = sizeof(packet);
771 		rcvmhdr.msg_name = (caddr_t)&from6;
772 		rcvmhdr.msg_namelen = sizeof(from6);
773 		rcvmhdr.msg_iov = rcviov;
774 		rcvmhdr.msg_iovlen = 1;
775 		rcvcmsglen = CMSG_SPACE(sizeof(struct in6_pktinfo)) +
776 		    CMSG_SPACE(sizeof(int));
777 
778 		if ((rcvcmsgbuf = malloc(rcvcmsglen)) == NULL)
779 			errx(1, "malloc");
780 		rcvmhdr.msg_control = (caddr_t) rcvcmsgbuf;
781 		rcvmhdr.msg_controllen = rcvcmsglen;
782 
783 		/*
784 		 * Send UDP or ICMP
785 		 */
786 		if (conf->proto == IPPROTO_ICMP) {
787 			close(sndsock);
788 			sndsock = rcvsock;
789 		}
790 
791 		/*
792 		 * Source selection
793 		 */
794 		memset(&from6, 0, sizeof(from6));
795 		if (conf->source) {
796 			memset(&hints, 0, sizeof(hints));
797 			hints.ai_family = AF_INET6;
798 			hints.ai_socktype = SOCK_DGRAM;	/*dummy*/
799 			hints.ai_flags = AI_NUMERICHOST;
800 			if ((error = getaddrinfo(conf->source, "0", &hints,
801 			    &res)))
802 				errx(1, "%s: %s", conf->source,
803 				    gai_strerror(error));
804 			memcpy(&from6, res->ai_addr, res->ai_addrlen);
805 			freeaddrinfo(res);
806 		} else {
807 			struct sockaddr_in6 nxt;
808 			int dummy;
809 
810 			nxt = to6;
811 			nxt.sin6_port = htons(DUMMY_PORT);
812 			if ((dummy = socket(AF_INET6, SOCK_DGRAM, 0)) == -1)
813 				err(1, "socket");
814 			if (conf->rtableid > 0 &&
815 			    setsockopt(dummy, SOL_SOCKET, SO_RTABLE,
816 			    &conf->rtableid, sizeof(conf->rtableid)) == -1)
817 				err(1, "setsockopt(SO_RTABLE)");
818 			if (connect(dummy, (struct sockaddr *)&nxt,
819 			    nxt.sin6_len) == -1)
820 				err(1, "connect");
821 			len = sizeof(from6);
822 			if (getsockname(dummy, (struct sockaddr *)&from6,
823 			    &len) == -1)
824 				err(1, "getsockname");
825 			close(dummy);
826 		}
827 
828 		from6.sin6_port = htons(0);
829 		if (bind(sndsock, (struct sockaddr *)&from6, from6.sin6_len) == -1)
830 			err(1, "bind sndsock");
831 
832 		if (conf->tflag) {
833 			if (setsockopt(sndsock, IPPROTO_IPV6, IPV6_TCLASS,
834 			    &conf->tos, sizeof(conf->tos)) == -1)
835 				err(6, "IPV6_TCLASS");
836 		}
837 
838 		len = sizeof(from6);
839 		if (getsockname(sndsock, (struct sockaddr *)&from6, &len) == -1)
840 			err(1, "getsockname");
841 		srcport = ntohs(from6.sin6_port);
842 		break;
843 	default:
844 		errx(1, "unsupported AF: %d", to->sa_family);
845 		break;
846 	}
847 
848 	if (conf->dflag) {
849 		(void) setsockopt(rcvsock, SOL_SOCKET, SO_DEBUG,
850 		    &on, sizeof(on));
851 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
852 		    &on, sizeof(on));
853 	}
854 
855 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF,
856 	    &datalen, sizeof(datalen)) == -1)
857 		err(6, "SO_SNDBUF");
858 
859 	if (conf->nflag && !conf->Aflag) {
860 		if (pledge("stdio inet", NULL) == -1)
861 			err(1, "pledge");
862 	} else {
863 		if (pledge("stdio inet dns", NULL) == -1)
864 			err(1, "pledge");
865 	}
866 
867 	if (getnameinfo(to, to->sa_len, hbuf,
868 	    sizeof(hbuf), NULL, 0, NI_NUMERICHOST))
869 		strlcpy(hbuf, "(invalid)", sizeof(hbuf));
870 	fprintf(stderr, "%s to %s (%s)", __progname, hostname, hbuf);
871 	if (conf->source)
872 		fprintf(stderr, " from %s", conf->source);
873 	fprintf(stderr, ", %u hops max, %d byte packets\n", conf->max_ttl,
874 	    packetlen);
875 	(void) fflush(stderr);
876 
877 	if (conf->first_ttl > 1)
878 		printf("Skipping %u intermediate hops\n", conf->first_ttl - 1);
879 
880 	event_init();
881 
882 	event_set(&sock_ev, rcvsock, EV_READ | EV_PERSIST, sock_read, NULL);
883 	event_add(&sock_ev, NULL);
884 	evtimer_set(&timer_ev, send_timer, &timer_ev);
885 	evtimer_add(&timer_ev, &tv);
886 	event_dispatch();
887 }
888 
889 void
usage(void)890 usage(void)
891 {
892 	if (v6flag) {
893 		fprintf(stderr, "usage: %s "
894 		    "[-ADdIlnSv] [-f first_hop] [-m max_hop] [-p port]\n"
895 		    "\t[-q nqueries] [-s sourceaddr] [-t toskeyword] [-V rtable] "
896 		    "[-w waittime]\n\thost [datalen]\n", __progname);
897 	} else {
898 		fprintf(stderr,
899 		    "usage: %s [-ADdIlnSvx] [-f first_ttl] [-g gateway_addr] "
900 		    "[-m max_ttl]\n"
901 		    "\t[-P proto] [-p port] [-q nqueries] [-s sourceaddr]\n"
902 		    "\t[-t toskeyword] "
903 		    "[-V rtable] [-w waittime] host [datalen]\n",
904 		    __progname);
905 	}
906 	exit(1);
907 }
908 
909 void
sock_read(int fd,short events,void * arg)910 sock_read(int fd, short events, void *arg)
911 {
912 	struct ip	*ip;
913 	struct timeval	 t2, tv = {0, 0};
914 	int		 pkg_ok, cc, recv_seq, recv_seq_row;
915 	char		 hbuf[NI_MAXHOST];
916 
917 	cc = recvmsg(rcvsock, &rcvmhdr, 0);
918 
919 	if (cc == 0)
920 		return;
921 
922 	evtimer_add(&timer_ev, &tv);
923 
924 	gettime(&t2);
925 
926 	pkg_ok = packet_ok(conf, to->sa_family, &rcvmhdr, cc, &recv_seq);
927 
928 	/* Skip wrong packet */
929 	if (pkg_ok == 0)
930 		goto out;
931 
932 	/* skip corrupt sequence number */
933 	if (recv_seq < 0 || recv_seq >= conf->max_ttl * conf->nprobes)
934 		goto out;
935 
936 	recv_seq_row = recv_seq / conf->nprobes;
937 
938 	/* skipping dup */
939 	if (tr_results[recv_seq].dup++)
940 		goto out;
941 
942 	switch (to->sa_family) {
943 	case AF_INET:
944 		ip = (struct ip *)packet;
945 
946 		print(conf, from, cc - (ip->ip_hl << 2), inet_ntop(AF_INET,
947 		    &ip->ip_dst, hbuf, sizeof(hbuf)), &tr_results[recv_seq]);
948 		break;
949 	case AF_INET6:
950 		print(conf, from, cc, rcvpktinfo ? inet_ntop(AF_INET6,
951 		    &rcvpktinfo->ipi6_addr, hbuf, sizeof(hbuf)) : "?",
952 		    &tr_results[recv_seq]);
953 		break;
954 	default:
955 		errx(1, "unsupported AF: %d", to->sa_family);
956 	}
957 
958 	tr_results[recv_seq].t2 = t2;
959 	tr_results[recv_seq].resp_ttl = v6flag ? rcvhlim : ip->ip_ttl;
960 
961 	waiting_ttls[recv_seq_row]--;
962 
963 	if (pkg_ok == -2) {
964 		if ((v6flag && rcvhlim <= 1) ||
965 		    (!v6flag && ip->ip_ttl <=1))
966 			snprintf(tr_results[recv_seq].icmp_code,
967 			    sizeof(tr_results[recv_seq].icmp_code), "%s", " !");
968 		tr_results[recv_seq].got_there++;
969 	} else {
970 		if (to->sa_family == AF_INET && conf->tflag)
971 			check_tos(ip, &last_tos, &tr_results[recv_seq]);
972 		if (pkg_ok != -1) {
973 			icmp_code(to->sa_family, pkg_ok - 1,
974 			    &tr_results[recv_seq].got_there,
975 			    &tr_results[recv_seq].unreachable,
976 			    &tr_results[recv_seq]);
977 		}
978 	}
979 
980 	if (cc && ((recv_seq + 1) % conf->nprobes) == 0 &&
981 	    (conf->xflag || conf->verbose))
982 		print_exthdr(packet, cc, &tr_results[recv_seq]);
983  out:
984 	catchup_result_rows(tr_results, conf);
985 }
986 
987 void
send_timer(int fd,short events,void * arg)988 send_timer(int fd, short events, void *arg)
989 {
990 	static int	 seq;
991 	struct timeval	 tv = {0, 30000}, t1;
992 	struct event	*ev = arg;
993 	int		 ttl;
994 
995 	evtimer_add(ev, &tv);
996 
997 	ttl = conf->first_ttl + seq / conf->nprobes;
998 	if (ttl <= conf->max_ttl) {
999 		gettime(&t1);
1000 		tr_results[seq].seq = seq;
1001 		tr_results[seq].row = seq / conf->nprobes;
1002 		tr_results[seq].ttl = ttl;
1003 		tr_results[seq].t1 = t1;
1004 		send_probe(conf, seq, ttl, to);
1005 		seq++;
1006 	}
1007 
1008 	catchup_result_rows(tr_results, conf);
1009 
1010 }
1011