xref: /openbsd/usr.sbin/traceroute/traceroute.c (revision 8932bfb7)
1 /*	$OpenBSD: traceroute.c,v 1.77 2011/06/21 17:31:07 mikeb Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Van Jacobson.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * traceroute host  - trace the route ip packets follow going to "host".
38  *
39  * Attempt to trace the route an ip packet would follow to some
40  * internet host.  We find out intermediate hops by launching probe
41  * packets with a small ttl (time to live) then listening for an
42  * icmp "time exceeded" reply from a gateway.  We start our probes
43  * with a ttl of one and increase by one until we get an icmp "port
44  * unreachable" (which means we got to "host") or hit a max (which
45  * defaults to 64 hops & can be changed with the -m flag).  Three
46  * probes (change with -q flag) are sent at each ttl setting and a
47  * line is printed showing the ttl, address of the gateway and
48  * round trip time of each probe.  If the probe answers come from
49  * different gateways, the address of each responding system will
50  * be printed.  If there is no response within a 5 sec. timeout
51  * interval (changed with the -w flag), a "*" is printed for that
52  * probe.
53  *
54  * Probe packets are UDP format.  We don't want the destination
55  * host to process them so the destination port is set to an
56  * unlikely value (if some clod on the destination is using that
57  * value, it can be changed with the -p flag).
58  *
59  * A sample use might be:
60  *
61  *     [yak 71]% traceroute nis.nsf.net.
62  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
63  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
64  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
65  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
66  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
67  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
68  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
69  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
70  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
71  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
72  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
73  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
74  *
75  * Note that lines 2 & 3 are the same.  This is due to a buggy
76  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
77  * packets with a zero ttl.
78  *
79  * A more interesting example is:
80  *
81  *     [yak 72]% traceroute allspice.lcs.mit.edu.
82  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
83  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
84  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
85  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
86  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
87  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
88  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
89  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
90  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
91  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
92  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
93  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
94  *     12  * * *
95  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
96  *     14  * * *
97  *     15  * * *
98  *     16  * * *
99  *     17  * * *
100  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
101  *
102  * (I start to see why I'm having so much trouble with mail to
103  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
104  * either don't send ICMP "time exceeded" messages or send them
105  * with a ttl too small to reach us.  14 - 17 are running the
106  * MIT C Gateway code that doesn't send "time exceeded"s.  God
107  * only knows what's going on with 12.
108  *
109  * The silent gateway 12 in the above may be the result of a bug in
110  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
111  * sends an unreachable message using whatever ttl remains in the
112  * original datagram.  Since, for gateways, the remaining ttl is
113  * zero, the icmp "time exceeded" is guaranteed to not make it back
114  * to us.  The behavior of this bug is slightly more interesting
115  * when it appears on the destination system:
116  *
117  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
118  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
119  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
120  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
121  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
122  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
123  *      7  * * *
124  *      8  * * *
125  *      9  * * *
126  *     10  * * *
127  *     11  * * *
128  *     12  * * *
129  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
130  *
131  * Notice that there are 12 "gateways" (13 is the final
132  * destination) and exactly the last half of them are "missing".
133  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
134  * is using the ttl from our arriving datagram as the ttl in its
135  * icmp reply.  So, the reply will time out on the return path
136  * (with no notice sent to anyone since icmp's aren't sent for
137  * icmp's) until we probe with a ttl that's at least twice the path
138  * length.  I.e., rip is really only 7 hops away.  A reply that
139  * returns with a ttl of 1 is a clue this problem exists.
140  * Traceroute prints a "!" after the time if the ttl is <= 1.
141  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
142  * non-standard (HPUX) software, expect to see this problem
143  * frequently and/or take care picking the target host of your
144  * probes.
145  *
146  * Other possible annotations after the time are !H, !N, !P (got a host,
147  * network or protocol unreachable, respectively), !S or !F (source
148  * route failed or fragmentation needed -- neither of these should
149  * ever occur and the associated gateway is busted if you see one).  If
150  * almost all the probes result in some kind of unreachable, traceroute
151  * will give up and exit.
152  *
153  * Notes
154  * -----
155  * This program must be run by root or be setuid.  (I suggest that
156  * you *don't* make it setuid -- casual use could result in a lot
157  * of unnecessary traffic on our poor, congested nets.)
158  *
159  * This program requires a kernel mod that does not appear in any
160  * system available from Berkeley:  A raw ip socket using proto
161  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
162  * opposed to data to be wrapped in a ip datagram).  See the README
163  * file that came with the source to this program for a description
164  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
165  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
166  * MODIFIED TO RUN THIS PROGRAM.
167  *
168  * The udp port usage may appear bizarre (well, ok, it is bizarre).
169  * The problem is that an icmp message only contains 8 bytes of
170  * data from the original datagram.  8 bytes is the size of a udp
171  * header so, if we want to associate replies with the original
172  * datagram, the necessary information must be encoded into the
173  * udp header (the ip id could be used but there's no way to
174  * interlock with the kernel's assignment of ip id's and, anyway,
175  * it would have taken a lot more kernel hacking to allow this
176  * code to set the ip id).  So, to allow two or more users to
177  * use traceroute simultaneously, we use this task's pid as the
178  * source port (the high bit is set to move the port number out
179  * of the "likely" range).  To keep track of which probe is being
180  * replied to (so times and/or hop counts don't get confused by a
181  * reply that was delayed in transit), we increment the destination
182  * port number before each probe.
183  *
184  * Don't use this as a coding example.  I was trying to find a
185  * routing problem and this code sort-of popped out after 48 hours
186  * without sleep.  I was amazed it ever compiled, much less ran.
187  *
188  * I stole the idea for this program from Steve Deering.  Since
189  * the first release, I've learned that had I attended the right
190  * IETF working group meetings, I also could have stolen it from Guy
191  * Almes or Matt Mathis.  I don't know (or care) who came up with
192  * the idea first.  I envy the originators' perspicacity and I'm
193  * glad they didn't keep the idea a secret.
194  *
195  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
196  * enhancements to the original distribution.
197  *
198  * I've hacked up a round-trip-route version of this that works by
199  * sending a loose-source-routed udp datagram through the destination
200  * back to yourself.  Unfortunately, SO many gateways botch source
201  * routing, the thing is almost worthless.  Maybe one day...
202  *
203  *  -- Van Jacobson (van@helios.ee.lbl.gov)
204  *     Tue Dec 20 03:50:13 PST 1988
205  */
206 
207 #include <sys/param.h>
208 #include <sys/time.h>
209 #include <sys/socket.h>
210 #include <sys/file.h>
211 #include <sys/ioctl.h>
212 #include <sys/sysctl.h>
213 
214 #include <netinet/in_systm.h>
215 #include <netinet/in.h>
216 #include <netinet/ip.h>
217 #include <netinet/ip_icmp.h>
218 #include <netinet/ip_var.h>
219 #include <netinet/udp.h>
220 
221 #include <arpa/inet.h>
222 
223 #include <netmpls/mpls.h>
224 
225 #include <ctype.h>
226 #include <err.h>
227 #include <errno.h>
228 #include <netdb.h>
229 #include <stdio.h>
230 #include <stdlib.h>
231 #include <string.h>
232 #include <unistd.h>
233 
234 #define MAX_LSRR		((MAX_IPOPTLEN - 4) / 4)
235 
236 #define MPLS_LABEL(m)		((m & MPLS_LABEL_MASK) >> MPLS_LABEL_OFFSET)
237 #define MPLS_EXP(m)		((m & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET)
238 
239 /*
240  * Format of the data in a (udp) probe packet.
241  */
242 struct packetdata {
243 	u_char seq;		/* sequence number of this packet */
244 	u_int8_t ttl;		/* ttl packet left with */
245 	u_char pad[2];
246 	u_int32_t sec;		/* time packet left */
247 	u_int32_t usec;
248 } __packed;
249 
250 struct in_addr gateway[MAX_LSRR + 1];
251 int lsrrlen = 0;
252 int32_t sec_perturb;
253 int32_t usec_perturb;
254 
255 u_char packet[512], *outpacket;	/* last inbound (icmp) packet */
256 
257 int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
258 void send_probe(int, u_int8_t, int, struct sockaddr_in *);
259 int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
260 void print_exthdr(u_char *, int);
261 void print(u_char *, int, struct sockaddr_in *);
262 char *inetname(struct in_addr);
263 u_short in_cksum(u_short *, int);
264 void usage(void);
265 
266 int s;				/* receive (icmp) socket file descriptor */
267 int sndsock;			/* send (udp) socket file descriptor */
268 
269 int datalen;			/* How much data */
270 int headerlen;			/* How long packet's header is */
271 
272 char *source = 0;
273 char *hostname;
274 
275 int nprobes = 3;
276 u_int8_t max_ttl = IPDEFTTL;
277 u_int8_t first_ttl = 1;
278 u_short ident;
279 u_short port = 32768+666;	/* start udp dest port # for probe packets */
280 u_char	proto = IPPROTO_UDP;
281 u_int8_t  icmp_type = ICMP_ECHO; /* default ICMP code/type */
282 u_char  icmp_code = 0;
283 int options;			/* socket options */
284 int verbose;
285 int waittime = 5;		/* time to wait for response (in seconds) */
286 int nflag;			/* print addresses numerically */
287 int dump;
288 int xflag;			/* show ICMP extension header */
289 int tflag;			/* tos flag was set */
290 
291 int
292 main(int argc, char *argv[])
293 {
294 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
295 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
296 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
297 	int last_tos, tos_returned;
298 	size_t size = sizeof(max_ttl);
299 	struct sockaddr_in from, to;
300 	struct hostent *hp;
301 	u_int32_t tmprnd;
302 	struct ip *ip, *inner_ip;
303 	struct icmp *icp;
304 	u_int8_t ttl;
305 	char *ep;
306 	const char *errstr;
307 	long l;
308 	uid_t uid;
309 	u_int rtableid;
310 
311 	if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
312 		err(5, "icmp socket");
313 	if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
314 		err(5, "raw socket");
315 
316 	/* revoke privs */
317 	uid = getuid();
318 	if (setresuid(uid, uid, uid) == -1)
319 		err(1, "setresuid");
320 
321 	(void) sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size,
322 	    NULL, 0);
323 
324 	while ((ch = getopt(argc, argv, "cDdf:g:Ilm:nP:p:q:rSs:t:V:vw:x"))
325 			!= -1)
326 		switch (ch) {
327 		case 'S':
328 			sump = 1;
329 			break;
330 		case 'f':
331 			errno = 0;
332 			ep = NULL;
333 			l = strtol(optarg, &ep, 10);
334 			if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
335 				errx(1, "min ttl must be 1 to %u.", max_ttl);
336 			first_ttl = (u_int8_t)l;
337 			break;
338 		case 'c':
339 			incflag = 0;
340 			break;
341 		case 'd':
342 			options |= SO_DEBUG;
343 			break;
344 		case 'D':
345 			dump = 1;
346 			break;
347 		case 'g':
348 			if (lsrr >= MAX_LSRR)
349 				errx(1, "too many gateways; max %d", MAX_LSRR);
350 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
351 				hp = gethostbyname(optarg);
352 				if (hp == 0)
353 					errx(1, "unknown host %s", optarg);
354 				memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
355 			}
356 			if (++lsrr == 1)
357 				lsrrlen = 4;
358 			lsrrlen += 4;
359 			break;
360 		case 'I':
361 			if (protoset)
362 				errx(1, "protocol already set with -P");
363 			protoset = 1;
364 			proto = IPPROTO_ICMP;
365 			break;
366 		case 'l':
367 			ttl_flag++;
368 			break;
369 		case 'm':
370 			errno = 0;
371 			ep = NULL;
372 			l = strtol(optarg, &ep, 10);
373 			if (errno || !*optarg || *ep || l < first_ttl ||
374 			    l > MAXTTL)
375 				errx(1, "max ttl must be %u to %u.", first_ttl,
376 				    MAXTTL);
377 			max_ttl = (u_int8_t)l;
378 			break;
379 		case 'n':
380 			nflag++;
381 			break;
382 		case 'p':
383 			errno = 0;
384 			ep = NULL;
385 			l = strtol(optarg, &ep, 10);
386 			if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
387 				errx(1, "port must be >0, <65536.");
388 			port = (int)l;
389 			break;
390 		case 'P':
391 			if (protoset)
392 				errx(1, "protocol already set with -I");
393 			protoset = 1;
394 			errno = 0;
395 			ep = NULL;
396 			l = strtol(optarg, &ep, 10);
397 			if (errno || !*optarg || *ep || l < 1 ||
398 			    l >= IPPROTO_MAX) {
399 				struct protoent *pent;
400 
401 				pent = getprotobyname(optarg);
402 				if (pent)
403 					proto = pent->p_proto;
404 				else
405 					errx(1, "proto must be >=1, or a name.");
406 			} else
407 				proto = (int)l;
408 			break;
409 		case 'q':
410 			errno = 0;
411 			ep = NULL;
412 			l = strtol(optarg, &ep, 10);
413 			if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
414 				errx(1, "nprobes must be >0.");
415 			nprobes = (int)l;
416 			break;
417 		case 'r':
418 			options |= SO_DONTROUTE;
419 			break;
420 		case 's':
421 			/*
422 			 * set the ip source address of the outbound
423 			 * probe (e.g., on a multi-homed host).
424 			 */
425 			source = optarg;
426 			break;
427 		case 't':
428 			errno = 0;
429 			ep = NULL;
430 			l = strtol(optarg, &ep, 10);
431 			if (errno || !*optarg || *ep || l < 0 || l > 255)
432 				errx(1, "tos must be 0 to 255.");
433 			last_tos = tos = (int)l;
434 			tflag = 1;
435 			break;
436 		case 'v':
437 			verbose++;
438 			break;
439 		case 'V':
440 			rtableid = (unsigned int)strtonum(optarg, 0,
441 			    RT_TABLEID_MAX, &errstr);
442 			if (errstr)
443 				errx(1, "rtable value is %s: %s",
444 				    errstr, optarg);
445 			if (setsockopt(sndsock, SOL_SOCKET, SO_RTABLE,
446 			    &rtableid, sizeof(rtableid)) == -1)
447 				err(1, "setsockopt SO_RTABLE");
448 			if (setsockopt(s, SOL_SOCKET, SO_RTABLE,
449 			    &rtableid, sizeof(rtableid)) == -1)
450 				err(1, "setsockopt SO_RTABLE");
451 			break;
452 		case 'w':
453 			errno = 0;
454 			ep = NULL;
455 			l = strtol(optarg, &ep, 10);
456 			if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
457 				errx(1, "wait must be >1 sec.");
458 			waittime = (int)l;
459 			break;
460 		case 'x':
461 			xflag = 1;
462 			break;
463 		default:
464 			usage();
465 		}
466 	argc -= optind;
467 	argv += optind;
468 
469 	if (argc < 1)
470 		usage();
471 
472 	setlinebuf (stdout);
473 
474 	(void) memset(&to, 0, sizeof(struct sockaddr));
475 	to.sin_family = AF_INET;
476 	if (inet_aton(*argv, &to.sin_addr) != 0)
477 		hostname = *argv;
478 	else {
479 		hp = gethostbyname(*argv);
480 		if (hp == 0)
481 			errx(1, "unknown host %s", *argv);
482 		to.sin_family = hp->h_addrtype;
483 		memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
484 		if ((hostname = strdup(hp->h_name)) == NULL)
485 			err(1, "malloc");
486 		if (hp->h_addr_list[1] != NULL)
487 			warnx("Warning: %s has multiple addresses; using %s",
488 			    hostname, inet_ntoa(to.sin_addr));
489 	}
490 	if (*++argv) {
491 		errno = 0;
492 		ep = NULL;
493 		l = strtol(*argv, &ep, 10);
494 		if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
495 			errx(1, "datalen out of range");
496 		datalen = (int)l;
497 	}
498 
499 	switch (proto) {
500 	case IPPROTO_UDP:
501 		headerlen = (sizeof(struct ip) + lsrrlen +
502 		    sizeof(struct udphdr) + sizeof(struct packetdata));
503 		break;
504 	case IPPROTO_ICMP:
505 		headerlen = (sizeof(struct ip) + lsrrlen +
506 		    sizeof(struct icmp) + sizeof(struct packetdata));
507 		break;
508 	default:
509 		headerlen = (sizeof(struct ip) + lsrrlen +
510 		    sizeof(struct packetdata));
511 	}
512 
513 	if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
514 		errx(1, "packet size must be 0 to %d.",
515 		    IP_MAXPACKET - headerlen);
516 
517 	datalen += headerlen;
518 
519 	outpacket = malloc(datalen);
520 	if (outpacket == 0)
521 		err(1, "malloc");
522 	(void) memset(outpacket, 0, datalen);
523 
524 	ip = (struct ip *)outpacket;
525 	if (lsrr != 0) {
526 		u_char *p = (u_char *)(ip + 1);
527 
528 		*p++ = IPOPT_NOP;
529 		*p++ = IPOPT_LSRR;
530 		*p++ = lsrrlen - 1;
531 		*p++ = IPOPT_MINOFF;
532 		gateway[lsrr] = to.sin_addr;
533 		for (i = 1; i <= lsrr; i++) {
534 			memcpy(p, &gateway[i], sizeof(struct in_addr));
535 			p += sizeof(struct in_addr);
536 		}
537 		ip->ip_dst = gateway[0];
538 	} else
539 		ip->ip_dst = to.sin_addr;
540 	ip->ip_off = htons(0);
541 	ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
542 	ip->ip_p = proto;
543 	ip->ip_v = IPVERSION;
544 	ip->ip_tos = tos;
545 
546 	ident = (getpid() & 0xffff) | 0x8000;
547 	tmprnd = arc4random();
548 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
549 	    (tmprnd & 0x7ff);
550 	usec_perturb = arc4random();
551 
552 	if (options & SO_DEBUG)
553 		(void) setsockopt(s, SOL_SOCKET, SO_DEBUG,
554 		    (char *)&on, sizeof(on));
555 #ifdef SO_SNDBUF
556 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
557 	    sizeof(datalen)) < 0)
558 		err(6, "SO_SNDBUF");
559 #endif /* SO_SNDBUF */
560 #ifdef IP_HDRINCL
561 	if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
562 	    sizeof(on)) < 0)
563 		err(6, "IP_HDRINCL");
564 #endif /* IP_HDRINCL */
565 	if (options & SO_DEBUG)
566 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
567 		    (char *)&on, sizeof(on));
568 	if (options & SO_DONTROUTE)
569 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
570 		    (char *)&on, sizeof(on));
571 
572 	if (source) {
573 		(void) memset(&from, 0, sizeof(struct sockaddr));
574 		from.sin_family = AF_INET;
575 		if (inet_aton(source, &from.sin_addr) == 0)
576 			errx(1, "unknown host %s", source);
577 		ip->ip_src = from.sin_addr;
578 		if (getuid() != 0 &&
579 		    (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
580 		    (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
581 			errx(1, "source is on 127/8, destination is not");
582 
583 		if (getuid() &&
584 		    bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
585 			err(1, "bind");
586 	}
587 
588 	fprintf(stderr, "traceroute to %s (%s)", hostname,
589 		inet_ntoa(to.sin_addr));
590 	if (source)
591 		fprintf(stderr, " from %s", source);
592 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
593 	(void) fflush(stderr);
594 
595 	if (first_ttl > 1)
596 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
597 
598 	for (ttl = first_ttl; ttl && ttl <= max_ttl; ++ttl) {
599 		int got_there = 0, unreachable = 0, timeout = 0, loss;
600 		in_addr_t lastaddr = 0;
601 		quad_t dt;
602 
603 		printf("%2u ", ttl);
604 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
605 			int cc;
606 			struct timeval t1, t2;
607 			int code;
608 
609 			(void) gettimeofday(&t1, NULL);
610 			send_probe(++seq, ttl, incflag, &to);
611 			while ((cc = wait_for_reply(s, &from, &t1))) {
612 				(void) gettimeofday(&t2, NULL);
613 				if (t2.tv_sec - t1.tv_sec > waittime) {
614 					cc = 0;
615 					break;
616 				}
617 				i = packet_ok(packet, cc, &from, seq, incflag);
618 				/* Skip short packet */
619 				if (i == 0)
620 					continue;
621 				if (from.sin_addr.s_addr != lastaddr) {
622 					print(packet, cc, &from);
623 					lastaddr = from.sin_addr.s_addr;
624 				}
625 				dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
626 				    (quad_t)(t2.tv_usec - t1.tv_usec);
627 				printf("  %u", (u_int)(dt / 1000));
628 				if (dt % 1000)
629 					printf(".%u", (u_int)(dt % 1000));
630 				printf(" ms");
631 				ip = (struct ip *)packet;
632 				if (ttl_flag)
633 					printf(" (%u)", ip->ip_ttl);
634 				if (i == -2) {
635 #ifndef ARCHAIC
636 					ip = (struct ip *)packet;
637 					if (ip->ip_ttl <= 1)
638 						printf(" !");
639 #endif
640 					++got_there;
641 					break;
642 				}
643 
644 				icp = (struct icmp *) (((u_char *)ip)+(ip->ip_hl<<2));
645 				inner_ip = (struct ip *) (((u_char *)icp)+8);
646 
647 				tos_returned = inner_ip->ip_tos;
648 
649 				if (tflag && (tos_returned != last_tos))
650 					printf (" (TOS=%d!)", tos_returned);
651 
652 				last_tos = tos_returned;
653 
654 				/* time exceeded in transit */
655 				if (i == -1)
656 					break;
657 				code = i - 1;
658 				switch (code) {
659 				case ICMP_UNREACH_PORT:
660 #ifndef ARCHAIC
661 					ip = (struct ip *)packet;
662 					if (ip->ip_ttl <= 1)
663 						printf(" !");
664 #endif /* ARCHAIC */
665 					++got_there;
666 					break;
667 				case ICMP_UNREACH_NET:
668 					++unreachable;
669 					printf(" !N");
670 					break;
671 				case ICMP_UNREACH_HOST:
672 					++unreachable;
673 					printf(" !H");
674 					break;
675 				case ICMP_UNREACH_PROTOCOL:
676 					++got_there;
677 					printf(" !P");
678 					break;
679 				case ICMP_UNREACH_NEEDFRAG:
680 					++unreachable;
681 					printf(" !F");
682 					break;
683 				case ICMP_UNREACH_SRCFAIL:
684 					++unreachable;
685 					printf(" !S");
686 					break;
687 				case ICMP_UNREACH_FILTER_PROHIB:
688 					++unreachable;
689 					printf(" !X");
690 					break;
691 				case ICMP_UNREACH_NET_PROHIB: /*misuse*/
692 					++unreachable;
693 					printf(" !A");
694 					break;
695 				case ICMP_UNREACH_HOST_PROHIB:
696 					++unreachable;
697 					printf(" !C");
698 					break;
699 				case ICMP_UNREACH_NET_UNKNOWN:
700 				case ICMP_UNREACH_HOST_UNKNOWN:
701 					++unreachable;
702 					printf(" !U");
703 					break;
704 				case ICMP_UNREACH_ISOLATED:
705 					++unreachable;
706 					printf(" !I");
707 					break;
708 				case ICMP_UNREACH_TOSNET:
709 				case ICMP_UNREACH_TOSHOST:
710 					++unreachable;
711 					printf(" !T");
712 					break;
713 				default:
714 					++unreachable;
715 					printf(" !<%d>", i - 1);
716 					break;
717 				}
718 				break;
719 			}
720 			if (cc == 0) {
721 				printf(" *");
722 				timeout++;
723 				loss++;
724 			} else if (cc && probe == nprobes - 1 &&
725 			    (xflag || verbose))
726 				print_exthdr(packet, cc);
727 			(void) fflush(stdout);
728 		}
729 		if (sump)
730 			printf(" (%d%% loss)", (loss * 100) / nprobes);
731 		putchar('\n');
732 		if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
733 			break;
734 	}
735 	exit(0);
736 }
737 
738 void
739 print_exthdr(u_char *buf, int cc)
740 {
741 	struct icmp_ext_hdr exthdr;
742 	struct icmp_ext_obj_hdr objhdr;
743 	struct ip *ip;
744 	struct icmp *icp;
745 	int hlen, first;
746 	u_int32_t label;
747 	u_int16_t off, olen;
748 	u_int8_t type;
749 
750 	ip = (struct ip *)buf;
751 	hlen = ip->ip_hl << 2;
752 	if (cc < hlen + ICMP_MINLEN)
753 		return;
754 	icp = (struct icmp *)(buf + hlen);
755 	cc -= hlen + ICMP_MINLEN;
756 	buf += hlen + ICMP_MINLEN;
757 
758 	type = icp->icmp_type;
759 	if (type != ICMP_TIMXCEED && type != ICMP_UNREACH &&
760 	    type != ICMP_PARAMPROB)
761 		/* Wrong ICMP type for extension */
762 		return;
763 
764 	off = icp->icmp_length * sizeof(u_int32_t);
765 	if (off == 0)
766 		/*
767 		 * rfc 4884 Section 5.5: traceroute MUST try to parse
768 		 * broken ext headers. Again IETF bent over to please
769 		 * idotic corporations.
770 		 */
771 		off = ICMP_EXT_OFFSET;
772 	else if (off < ICMP_EXT_OFFSET)
773 		/* rfc 4884 requires an offset of at least 128 bytes */
774 		return;
775 
776 	/* make sure that at least one extension is present */
777 	if (cc < off + sizeof(exthdr) + sizeof(objhdr))
778 		/* Not enough space for ICMP extensions */
779 		return;
780 
781 	cc -= off;
782 	buf += off;
783 	memcpy(&exthdr, buf, sizeof(exthdr));
784 
785 	/* verify version */
786 	if ((exthdr.ieh_version & ICMP_EXT_HDR_VMASK) != ICMP_EXT_HDR_VERSION)
787 		return;
788 
789 	/* verify checksum */
790 	if (exthdr.ieh_cksum && in_cksum((u_short *)buf, cc))
791 		return;
792 
793 	buf += sizeof(exthdr);
794 	cc -= sizeof(exthdr);
795 
796 	while (cc > sizeof(objhdr)) {
797 		memcpy(&objhdr, buf, sizeof(objhdr));
798 		olen = ntohs(objhdr.ieo_length);
799 
800 		/* Sanity check the length field */
801 		if (olen < sizeof(objhdr) || olen > cc)
802 			return;
803 
804 		cc -= olen;
805 
806 		/* Move past the object header */
807 		buf += sizeof(objhdr);
808 		olen -= sizeof(objhdr);
809 
810 		switch (objhdr.ieo_cnum) {
811 		case ICMP_EXT_MPLS:
812 			/* RFC 4950: ICMP Extensions for MPLS */
813 			switch (objhdr.ieo_ctype) {
814 			case 1:
815 				first = 0;
816 				while (olen >= sizeof(u_int32_t)) {
817 					memcpy(&label, buf, sizeof(u_int32_t));
818 					label = htonl(label);
819 					buf += sizeof(u_int32_t);
820 					olen -= sizeof(u_int32_t);
821 
822 					if (first == 0) {
823 						printf(" [MPLS Label ");
824 						first++;
825 					} else
826 						printf(", ");
827 					printf("%d", MPLS_LABEL(label));
828 					if (MPLS_EXP(label))
829 						printf(" (Exp %x)",
830 						    MPLS_EXP(label));
831 				}
832 				if (olen > 0) {
833 					printf("|]");
834 					return;
835 				}
836 				if (first != 0)
837 					printf("]");
838 				break;
839 			default:
840 				buf += olen;
841 				break;
842 			}
843 			break;
844 		case ICMP_EXT_IFINFO:
845 		default:
846 			buf += olen;
847 			break;
848 		}
849 	}
850 }
851 
852 int
853 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
854 {
855 	socklen_t fromlen = sizeof (*from);
856 	struct timeval now, wait;
857 	int cc = 0, fdsn;
858 	fd_set *fdsp;
859 
860 	fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
861 	if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
862 		err(1, "malloc");
863 	memset(fdsp, 0, fdsn);
864 	FD_SET(sock, fdsp);
865 	gettimeofday(&now, NULL);
866 	wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
867 	wait.tv_usec =  sent->tv_usec - now.tv_usec;
868 	if (wait.tv_usec < 0) {
869 		wait.tv_usec += 1000000;
870 		wait.tv_sec--;
871 	}
872 	if (wait.tv_sec < 0)
873 		timerclear(&wait);
874 
875 	if (select(sock+1, fdsp, (fd_set *)0, (fd_set *)0, &wait) > 0)
876 		cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
877 		    (struct sockaddr *)from, &fromlen);
878 
879 	free(fdsp);
880 	return (cc);
881 }
882 
883 void
884 dump_packet(void)
885 {
886 	u_char *p;
887 	int i;
888 
889 	fprintf(stderr, "packet data:");
890 	for (p = outpacket, i = 0; i < datalen; i++) {
891 		if ((i % 24) == 0)
892 			fprintf(stderr, "\n ");
893 		fprintf(stderr, " %02x", *p++);
894 	}
895 	fprintf(stderr, "\n");
896 }
897 
898 void
899 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
900 {
901 	struct ip *ip = (struct ip *)outpacket;
902 	u_char *p = (u_char *)(ip + 1);
903 	struct udphdr *up = (struct udphdr *)(p + lsrrlen);
904 	struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
905 	struct packetdata *op;
906 	struct timeval tv;
907 	int i;
908 
909 	ip->ip_len = htons(datalen);
910 	ip->ip_ttl = ttl;
911 	ip->ip_id = htons(ident+seq);
912 
913 	switch (proto) {
914 	case IPPROTO_ICMP:
915 		icmpp->icmp_type = icmp_type;
916 		icmpp->icmp_code = icmp_code;
917 		icmpp->icmp_seq = htons(seq);
918 		icmpp->icmp_id = htons(ident);
919 		op = (struct packetdata *)(icmpp + 1);
920 		break;
921 	case IPPROTO_UDP:
922 		up->uh_sport = htons(ident);
923 		if (iflag)
924 			up->uh_dport = htons(port+seq);
925 		else
926 			up->uh_dport = htons(port);
927 		up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
928 		    lsrrlen));
929 		up->uh_sum = 0;
930 		op = (struct packetdata *)(up + 1);
931 		break;
932 	default:
933 		op = (struct packetdata *)(ip + 1);
934 		break;
935 	}
936 	op->seq = seq;
937 	op->ttl = ttl;
938 	(void) gettimeofday(&tv, NULL);
939 
940 	/*
941 	 * We don't want hostiles snooping the net to get any useful
942 	 * information about us. Send the timestamp in network byte order,
943 	 * and perturb the timestamp enough that they won't know our
944 	 * real clock ticker. We don't want to perturb the time by too
945 	 * much: being off by a suspiciously large amount might indicate
946 	 * OpenBSD.
947 	 *
948 	 * The timestamps in the packet are currently unused. If future
949 	 * work wants to use them they will have to subtract out the
950 	 * perturbation first.
951 	 */
952 	(void) gettimeofday(&tv, NULL);
953 	op->sec = htonl(tv.tv_sec + sec_perturb);
954 	op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
955 
956 	if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
957 		icmpp->icmp_cksum = 0;
958 		icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
959 		    datalen - sizeof(struct ip) - lsrrlen);
960 		if (icmpp->icmp_cksum == 0)
961 			icmpp->icmp_cksum = 0xffff;
962 	}
963 
964 	if (dump)
965 		dump_packet();
966 
967 	i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
968 	    sizeof(struct sockaddr_in));
969 	if (i < 0 || i != datalen)  {
970 		if (i < 0)
971 			perror("sendto");
972 		printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
973 		    datalen, i);
974 		(void) fflush(stdout);
975 	}
976 }
977 
978 static char *ttab[] = {
979 	"Echo Reply",
980 	"ICMP 1",
981 	"ICMP 2",
982 	"Dest Unreachable",
983 	"Source Quench",
984 	"Redirect",
985 	"ICMP 6",
986 	"ICMP 7",
987 	"Echo",
988 	"Router Advert",
989 	"Router Solicit",
990 	"Time Exceeded",
991 	"Param Problem",
992 	"Timestamp",
993 	"Timestamp Reply",
994 	"Info Request",
995 	"Info Reply",
996 	"Mask Request",
997 	"Mask Reply"
998 };
999 
1000 /*
1001  * Convert an ICMP "type" field to a printable string.
1002  */
1003 char *
1004 pr_type(u_int8_t t)
1005 {
1006 	if (t > 18)
1007 		return ("OUT-OF-RANGE");
1008 	return (ttab[t]);
1009 }
1010 
1011 int
1012 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
1013 {
1014 	struct icmp *icp;
1015 	u_char code;
1016 	u_int8_t type;
1017 	int hlen;
1018 #ifndef ARCHAIC
1019 	struct ip *ip;
1020 
1021 	ip = (struct ip *) buf;
1022 	hlen = ip->ip_hl << 2;
1023 	if (cc < hlen + ICMP_MINLEN) {
1024 		if (verbose)
1025 			printf("packet too short (%d bytes) from %s\n", cc,
1026 			    inet_ntoa(from->sin_addr));
1027 		return (0);
1028 	}
1029 	cc -= hlen;
1030 	icp = (struct icmp *)(buf + hlen);
1031 #else
1032 	icp = (struct icmp *)buf;
1033 #endif /* ARCHAIC */
1034 	type = icp->icmp_type;
1035 	code = icp->icmp_code;
1036 	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
1037 	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
1038 		struct ip *hip;
1039 		struct udphdr *up;
1040 		struct icmp *icmpp;
1041 
1042 		hip = &icp->icmp_ip;
1043 		hlen = hip->ip_hl << 2;
1044 
1045 		switch (proto) {
1046 		case IPPROTO_ICMP:
1047 			if (icmp_type == ICMP_ECHO &&
1048 			    type == ICMP_ECHOREPLY &&
1049 			    icp->icmp_id == htons(ident) &&
1050 			    icp->icmp_seq == htons(seq))
1051 				return (-2); /* we got there */
1052 
1053 			icmpp = (struct icmp *)((u_char *)hip + hlen);
1054 			if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
1055 			    icmpp->icmp_id == htons(ident) &&
1056 			    icmpp->icmp_seq == htons(seq))
1057 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1058 			break;
1059 
1060 		case IPPROTO_UDP:
1061 			up = (struct udphdr *)((u_char *)hip + hlen);
1062 			if (hlen + 12 <= cc && hip->ip_p == proto &&
1063 			    up->uh_sport == htons(ident) &&
1064 			    ((iflag && up->uh_dport == htons(port + seq)) ||
1065 			    (!iflag && up->uh_dport == htons(port))))
1066 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1067 			break;
1068 		default:
1069 			/* this is some odd, user specified proto,
1070 			 * how do we check it?
1071 			 */
1072 			if (hip->ip_p == proto)
1073 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1074 		}
1075 	}
1076 #ifndef ARCHAIC
1077 	if (verbose) {
1078 		int i;
1079 		in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
1080 
1081 		printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
1082 		printf(" to %s", inet_ntoa(ip->ip_dst));
1083 		printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
1084 		    icp->icmp_code);
1085 		for (i = 4; i < cc ; i += sizeof(in_addr_t))
1086 			printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
1087 	}
1088 #endif /* ARCHAIC */
1089 	return (0);
1090 }
1091 
1092 void
1093 print(u_char *buf, int cc, struct sockaddr_in *from)
1094 {
1095 	struct ip *ip;
1096 	int hlen;
1097 
1098 	ip = (struct ip *) buf;
1099 	hlen = ip->ip_hl << 2;
1100 	cc -= hlen;
1101 
1102 	if (nflag)
1103 		printf(" %s", inet_ntoa(from->sin_addr));
1104 	else
1105 		printf(" %s (%s)", inetname(from->sin_addr),
1106 		    inet_ntoa(from->sin_addr));
1107 
1108 	if (verbose)
1109 		printf(" %d bytes to %s", cc, inet_ntoa(ip->ip_dst));
1110 }
1111 
1112 
1113 /*
1114  * Checksum routine for Internet Protocol family headers (C Version)
1115  */
1116 u_short
1117 in_cksum(u_short *addr, int len)
1118 {
1119 	u_short *w = addr, answer;
1120 	int nleft = len, sum = 0;
1121 
1122 	/*
1123 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
1124 	 *  we add sequential 16 bit words to it, and at the end, fold
1125 	 *  back all the carry bits from the top 16 bits into the lower
1126 	 *  16 bits.
1127 	 */
1128 	while (nleft > 1)  {
1129 		sum += *w++;
1130 		nleft -= 2;
1131 	}
1132 
1133 	/* mop up an odd byte, if necessary */
1134 	if (nleft == 1)
1135 		sum += *(u_char *)w;
1136 
1137 	/*
1138 	 * add back carry outs from top 16 bits to low 16 bits
1139 	 */
1140 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
1141 	sum += (sum >> 16);			/* add carry */
1142 	answer = ~sum;				/* truncate to 16 bits */
1143 	return (answer);
1144 }
1145 
1146 /*
1147  * Construct an Internet address representation.
1148  */
1149 char *
1150 inetname(struct in_addr in)
1151 {
1152 	static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1153 	static int first = 1;
1154 	struct hostent *hp;
1155 	char *cp;
1156 
1157 	if (first) {
1158 		first = 0;
1159 		if (gethostname(domain, sizeof domain) == 0 &&
1160 		    (cp = strchr(domain, '.')) != NULL) {
1161 			strlcpy(domain, cp + 1, sizeof(domain));
1162 		}
1163 	}
1164 	if (in.s_addr != INADDR_ANY) {
1165 		hp = gethostbyaddr((char *)&in, sizeof(in), AF_INET);
1166 		if (hp != NULL) {
1167 			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1168 			    strcmp(cp + 1, domain) == 0)
1169 				*cp = '\0';
1170 			strlcpy(line, hp->h_name, sizeof(line));
1171 			return (line);
1172 		}
1173 	}
1174 	return (inet_ntoa(in));
1175 }
1176 
1177 void
1178 usage(void)
1179 {
1180 	extern char *__progname;
1181 
1182 	fprintf(stderr,
1183 	    "usage: %s [-cDdIlnrSvx] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1184 	    "\t[-P proto] [-p port] [-q nqueries] [-s src_addr] [-t tos]\n"
1185 	    "\t[-V rtable] [-w waittime] host [packetsize]\n", __progname);
1186 	exit(1);
1187 }
1188