1 /*	$OpenBSD: traceroute.c,v 1.61 2004/01/26 18:23:51 deraadt Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Van Jacobson.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $DragonFly: src/usr.sbin/traceroute/traceroute.c,v 1.3 2004/08/27 21:27:41 asmodai Exp $
36  */
37 
38 #ifndef lint
39 static char copyright[] =
40 "@(#) Copyright (c) 1990, 1993\n\
41 	The Regents of the University of California.  All rights reserved.\n";
42 #endif /* not lint */
43 
44 #ifndef lint
45 #if 0
46 static char sccsid[] = "@(#)traceroute.c	8.1 (Berkeley) 6/6/93";*/
47 #else
48 static char rcsid[] = "$OpenBSD: traceroute.c,v 1.61 2004/01/26 18:23:51 deraadt Exp $";
49 #endif
50 #endif /* not lint */
51 
52 /*
53  * traceroute host  - trace the route ip packets follow going to "host".
54  *
55  * Attempt to trace the route an ip packet would follow to some
56  * internet host.  We find out intermediate hops by launching probe
57  * packets with a small ttl (time to live) then listening for an
58  * icmp "time exceeded" reply from a gateway.  We start our probes
59  * with a ttl of one and increase by one until we get an icmp "port
60  * unreachable" (which means we got to "host") or hit a max (which
61  * defaults to 64 hops & can be changed with the -m flag).  Three
62  * probes (change with -q flag) are sent at each ttl setting and a
63  * line is printed showing the ttl, address of the gateway and
64  * round trip time of each probe.  If the probe answers come from
65  * different gateways, the address of each responding system will
66  * be printed.  If there is no response within a 5 sec. timeout
67  * interval (changed with the -w flag), a "*" is printed for that
68  * probe.
69  *
70  * Probe packets are UDP format.  We don't want the destination
71  * host to process them so the destination port is set to an
72  * unlikely value (if some clod on the destination is using that
73  * value, it can be changed with the -p flag).
74  *
75  * A sample use might be:
76  *
77  *     [yak 71]% traceroute nis.nsf.net.
78  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
79  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
80  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
81  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
82  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
83  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
84  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
85  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
86  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
87  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
88  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
89  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
90  *
91  * Note that lines 2 & 3 are the same.  This is due to a buggy
92  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
93  * packets with a zero ttl.
94  *
95  * A more interesting example is:
96  *
97  *     [yak 72]% traceroute allspice.lcs.mit.edu.
98  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
99  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
100  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
101  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
102  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
103  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
104  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
105  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
106  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
107  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
108  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
109  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
110  *     12  * * *
111  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
112  *     14  * * *
113  *     15  * * *
114  *     16  * * *
115  *     17  * * *
116  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
117  *
118  * (I start to see why I'm having so much trouble with mail to
119  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
120  * either don't send ICMP "time exceeded" messages or send them
121  * with a ttl too small to reach us.  14 - 17 are running the
122  * MIT C Gateway code that doesn't send "time exceeded"s.  God
123  * only knows what's going on with 12.
124  *
125  * The silent gateway 12 in the above may be the result of a bug in
126  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
127  * sends an unreachable message using whatever ttl remains in the
128  * original datagram.  Since, for gateways, the remaining ttl is
129  * zero, the icmp "time exceeded" is guaranteed to not make it back
130  * to us.  The behavior of this bug is slightly more interesting
131  * when it appears on the destination system:
132  *
133  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
134  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
135  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
136  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
137  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
138  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
139  *      7  * * *
140  *      8  * * *
141  *      9  * * *
142  *     10  * * *
143  *     11  * * *
144  *     12  * * *
145  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
146  *
147  * Notice that there are 12 "gateways" (13 is the final
148  * destination) and exactly the last half of them are "missing".
149  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
150  * is using the ttl from our arriving datagram as the ttl in its
151  * icmp reply.  So, the reply will time out on the return path
152  * (with no notice sent to anyone since icmp's aren't sent for
153  * icmp's) until we probe with a ttl that's at least twice the path
154  * length.  I.e., rip is really only 7 hops away.  A reply that
155  * returns with a ttl of 1 is a clue this problem exists.
156  * Traceroute prints a "!" after the time if the ttl is <= 1.
157  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
158  * non-standard (HPUX) software, expect to see this problem
159  * frequently and/or take care picking the target host of your
160  * probes.
161  *
162  * Other possible annotations after the time are !H, !N, !P (got a host,
163  * network or protocol unreachable, respectively), !S or !F (source
164  * route failed or fragmentation needed -- neither of these should
165  * ever occur and the associated gateway is busted if you see one).  If
166  * almost all the probes result in some kind of unreachable, traceroute
167  * will give up and exit.
168  *
169  * Notes
170  * -----
171  * This program must be run by root or be setuid.  (I suggest that
172  * you *don't* make it setuid -- casual use could result in a lot
173  * of unnecessary traffic on our poor, congested nets.)
174  *
175  * This program requires a kernel mod that does not appear in any
176  * system available from Berkeley:  A raw ip socket using proto
177  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
178  * opposed to data to be wrapped in a ip datagram).  See the README
179  * file that came with the source to this program for a description
180  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
181  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
182  * MODIFIED TO RUN THIS PROGRAM.
183  *
184  * The udp port usage may appear bizarre (well, ok, it is bizarre).
185  * The problem is that an icmp message only contains 8 bytes of
186  * data from the original datagram.  8 bytes is the size of a udp
187  * header so, if we want to associate replies with the original
188  * datagram, the necessary information must be encoded into the
189  * udp header (the ip id could be used but there's no way to
190  * interlock with the kernel's assignment of ip id's and, anyway,
191  * it would have taken a lot more kernel hacking to allow this
192  * code to set the ip id).  So, to allow two or more users to
193  * use traceroute simultaneously, we use this task's pid as the
194  * source port (the high bit is set to move the port number out
195  * of the "likely" range).  To keep track of which probe is being
196  * replied to (so times and/or hop counts don't get confused by a
197  * reply that was delayed in transit), we increment the destination
198  * port number before each probe.
199  *
200  * Don't use this as a coding example.  I was trying to find a
201  * routing problem and this code sort-of popped out after 48 hours
202  * without sleep.  I was amazed it ever compiled, much less ran.
203  *
204  * I stole the idea for this program from Steve Deering.  Since
205  * the first release, I've learned that had I attended the right
206  * IETF working group meetings, I also could have stolen it from Guy
207  * Almes or Matt Mathis.  I don't know (or care) who came up with
208  * the idea first.  I envy the originators' perspicacity and I'm
209  * glad they didn't keep the idea a secret.
210  *
211  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
212  * enhancements to the original distribution.
213  *
214  * I've hacked up a round-trip-route version of this that works by
215  * sending a loose-source-routed udp datagram through the destination
216  * back to yourself.  Unfortunately, SO many gateways botch source
217  * routing, the thing is almost worthless.  Maybe one day...
218  *
219  *  -- Van Jacobson (van@helios.ee.lbl.gov)
220  *     Tue Dec 20 03:50:13 PST 1988
221  */
222 
223 #include <sys/param.h>
224 #include <sys/time.h>
225 #include <sys/socket.h>
226 #include <sys/file.h>
227 #include <sys/ioctl.h>
228 #include <sys/sysctl.h>
229 
230 #include <netinet/in_systm.h>
231 #include <netinet/in.h>
232 #include <netinet/ip.h>
233 #include <netinet/ip_icmp.h>
234 #include <netinet/ip_var.h>
235 #include <netinet/udp.h>
236 
237 #include <arpa/inet.h>
238 
239 #include <ctype.h>
240 #include <err.h>
241 #include <errno.h>
242 #include <netdb.h>
243 #include <stdio.h>
244 #include <stdlib.h>
245 #include <string.h>
246 #include <unistd.h>
247 
248 #define	MAX_LSRR	((MAX_IPOPTLEN - 4) / 4)
249 
250 /*
251  * Format of the data in a (udp) probe packet.
252  */
253 struct packetdata {
254 	u_char seq;		/* sequence number of this packet */
255 	u_int8_t ttl;		/* ttl packet left with */
256 	u_int32_t sec;		/* time packet left */
257 	u_int32_t usec;
258 };
259 
260 struct in_addr gateway[MAX_LSRR + 1];
261 int lsrrlen = 0;
262 int32_t sec_perturb;
263 int32_t usec_perturb;
264 
265 u_char packet[512], *outpacket;	/* last inbound (icmp) packet */
266 
267 void dump_packet(void);
268 int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
269 void send_probe(int, u_int8_t, int, struct sockaddr_in *);
270 int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
271 char *pr_type(u_int8_t);
272 void print(u_char *, int, struct sockaddr_in *);
273 char *inetname(struct in_addr);
274 u_short in_cksum(u_short *, int);
275 void usage(void);
276 
277 int s;				/* receive (icmp) socket file descriptor */
278 int sndsock;			/* send (udp) socket file descriptor */
279 
280 int datalen;			/* How much data */
281 int headerlen;			/* How long packet's header is */
282 
283 char *source = 0;
284 char *hostname;
285 
286 int nprobes = 3;
287 u_int8_t max_ttl = IPDEFTTL;
288 u_int8_t first_ttl = 1;
289 u_short ident;
290 u_short port = 32768+666;	/* start udp dest port # for probe packets */
291 u_char	proto = IPPROTO_UDP;
292 u_int8_t  icmp_type = ICMP_ECHO; /* default ICMP code/type */
293 u_char  icmp_code = 0;
294 int options;			/* socket options */
295 int verbose;
296 int waittime = 5;		/* time to wait for response (in seconds) */
297 int nflag;			/* print addresses numerically */
298 int dump;
299 
300 int
301 main(int argc, char *argv[])
302 {
303 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
304 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
305 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
306 	size_t size = sizeof(max_ttl);
307 	struct sockaddr_in from, to;
308 	struct hostent *hp;
309 	u_int32_t tmprnd;
310 	struct ip *ip;
311 	u_int8_t ttl;
312 	char *ep;
313 	long l;
314 
315 	if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
316 		err(5, "icmp socket");
317 	if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
318 		err(5, "raw socket");
319 
320 	/* revoke privs */
321 	seteuid(getuid());
322 	setuid(getuid());
323 
324 	(void) sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size,
325 	    NULL, 0);
326 
327 	while ((ch = getopt(argc, argv, "SDIdg:f:m:np:q:rs:t:w:vlP:c")) != -1)
328 		switch (ch) {
329 		case 'S':
330 			sump = 1;
331 			break;
332 		case 'f':
333 			errno = 0;
334 			ep = NULL;
335 			l = strtol(optarg, &ep, 10);
336 			if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
337 				errx(1, "min ttl must be 1 to %u.", max_ttl);
338 			first_ttl = (u_int8_t)l;
339 			break;
340 		case 'c':
341 			incflag = 0;
342 			break;
343 		case 'd':
344 			options |= SO_DEBUG;
345 			break;
346 		case 'D':
347 			dump = 1;
348 			break;
349 		case 'g':
350 			if (lsrr >= MAX_LSRR)
351 				errx(1, "too many gateways; max %d", MAX_LSRR);
352 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
353 				hp = gethostbyname(optarg);
354 				if (hp == 0)
355 					errx(1, "unknown host %s", optarg);
356 				memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
357 			}
358 			if (++lsrr == 1)
359 				lsrrlen = 4;
360 			lsrrlen += 4;
361 			break;
362 		case 'I':
363 			if (protoset)
364 				errx(1, "protocol already set with -P");
365 			protoset = 1;
366 			proto = IPPROTO_ICMP;
367 			break;
368 		case 'l':
369 			ttl_flag++;
370 			break;
371 		case 'm':
372 			errno = 0;
373 			ep = NULL;
374 			l = strtol(optarg, &ep, 10);
375 			if (errno || !*optarg || *ep || l < first_ttl ||
376 			    l > MAXTTL)
377 				errx(1, "max ttl must be %u to %u.", first_ttl,
378 				    MAXTTL);
379 			max_ttl = (u_int8_t)l;
380 			break;
381 		case 'n':
382 			nflag++;
383 			break;
384 		case 'p':
385 			errno = 0;
386 			ep = NULL;
387 			l = strtol(optarg, &ep, 10);
388 			if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
389 				errx(1, "port must be >0, <65536.");
390 			port = (int)l;
391 			break;
392 		case 'P':
393 			if (protoset)
394 				errx(1, "protocol already set with -I");
395 			protoset = 1;
396 			errno = 0;
397 			ep = NULL;
398 			l = strtol(optarg, &ep, 10);
399 			if (errno || !*optarg || *ep || l < 1 ||
400 			    l >= IPPROTO_MAX) {
401 				struct protoent *pent;
402 
403 				pent = getprotobyname(optarg);
404 				if (pent)
405 					proto = pent->p_proto;
406 				else
407 					errx(1, "proto must be >=1, or a name.");
408 			} else
409 				proto = (int)l;
410 			break;
411 		case 'q':
412 			errno = 0;
413 			ep = NULL;
414 			l = strtol(optarg, &ep, 10);
415 			if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
416 				errx(1, "nprobes must be >0.");
417 			nprobes = (int)l;
418 			break;
419 		case 'r':
420 			options |= SO_DONTROUTE;
421 			break;
422 		case 's':
423 			/*
424 			 * set the ip source address of the outbound
425 			 * probe (e.g., on a multi-homed host).
426 			 */
427 			source = optarg;
428 			break;
429 		case 't':
430 			errno = 0;
431 			ep = NULL;
432 			l = strtol(optarg, &ep, 10);
433 			if (errno || !*optarg || *ep || l < 0 || l > 255)
434 				errx(1, "tos must be 0 to 255.");
435 			tos = (int)l;
436 			break;
437 		case 'v':
438 			verbose++;
439 			break;
440 		case 'w':
441 			errno = 0;
442 			ep = NULL;
443 			l = strtol(optarg, &ep, 10);
444 			if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
445 				errx(1, "wait must be >1 sec.");
446 			waittime = (int)l;
447 			break;
448 		default:
449 			usage();
450 		}
451 	argc -= optind;
452 	argv += optind;
453 
454 	if (argc < 1)
455 		usage();
456 
457 	setlinebuf (stdout);
458 
459 	(void) memset(&to, 0, sizeof(struct sockaddr));
460 	to.sin_family = AF_INET;
461 	if (inet_aton(*argv, &to.sin_addr) != 0)
462 		hostname = *argv;
463 	else {
464 		hp = gethostbyname(*argv);
465 		if (hp == 0)
466 			errx(1, "unknown host %s", *argv);
467 		to.sin_family = hp->h_addrtype;
468 		memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
469 		if ((hostname = strdup(hp->h_name)) == NULL)
470 			err(1, "malloc");
471 		if (hp->h_addr_list[1] != NULL)
472 			warnx("Warning: %s has multiple addresses; using %s",
473 			    hostname, inet_ntoa(to.sin_addr));
474 	}
475 	if (*++argv) {
476 		errno = 0;
477 		ep = NULL;
478 		l = strtol(*argv, &ep, 10);
479 		if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
480 			errx(1, "datalen out of range");
481 		datalen = (int)l;
482 	}
483 
484 	switch (proto) {
485 	case IPPROTO_UDP:
486 		headerlen = (sizeof(struct ip) + lsrrlen +
487 		    sizeof(struct udphdr) + sizeof(struct packetdata));
488 		break;
489 	case IPPROTO_ICMP:
490 		headerlen = (sizeof(struct ip) + lsrrlen +
491 		    sizeof(struct icmp) + sizeof(struct packetdata));
492 		break;
493 	default:
494 		headerlen = (sizeof(struct ip) + lsrrlen +
495 		    sizeof(struct packetdata));
496 	}
497 
498 	if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
499 		errx(1, "packet size must be 0 to %d.",
500 		    IP_MAXPACKET - headerlen);
501 
502 	datalen += headerlen;
503 
504 	outpacket = (u_char *)malloc(datalen);
505 	if (outpacket == 0)
506 		err(1, "malloc");
507 	(void) memset(outpacket, 0, datalen);
508 
509 	ip = (struct ip *)outpacket;
510 	if (lsrr != 0) {
511 		u_char *p = (u_char *)(ip + 1);
512 
513 		*p++ = IPOPT_NOP;
514 		*p++ = IPOPT_LSRR;
515 		*p++ = lsrrlen - 1;
516 		*p++ = IPOPT_MINOFF;
517 		gateway[lsrr] = to.sin_addr;
518 		for (i = 1; i <= lsrr; i++) {
519 			memcpy(p, &gateway[i], sizeof(struct in_addr));
520 			p += sizeof(struct in_addr);
521 		}
522 		ip->ip_dst = gateway[0];
523 	} else
524 		ip->ip_dst = to.sin_addr;
525 	ip->ip_off = htons(0);
526 	ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
527 	ip->ip_p = proto;
528 	ip->ip_v = IPVERSION;
529 	ip->ip_tos = tos;
530 
531 	ident = (getpid() & 0xffff) | 0x8000;
532 	tmprnd = arc4random();
533 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
534 	    (tmprnd & 0x7ff);
535 	usec_perturb = arc4random();
536 
537 	if (options & SO_DEBUG)
538 		(void) setsockopt(s, SOL_SOCKET, SO_DEBUG,
539 		    (char *)&on, sizeof(on));
540 #ifdef SO_SNDBUF
541 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
542 	    sizeof(datalen)) < 0)
543 		err(6, "SO_SNDBUF");
544 #endif /* SO_SNDBUF */
545 #ifdef IP_HDRINCL
546 	if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
547 	    sizeof(on)) < 0)
548 		err(6, "IP_HDRINCL");
549 #endif /* IP_HDRINCL */
550 	if (options & SO_DEBUG)
551 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
552 		    (char *)&on, sizeof(on));
553 	if (options & SO_DONTROUTE)
554 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
555 		    (char *)&on, sizeof(on));
556 
557 	if (source) {
558 		(void) memset(&from, 0, sizeof(struct sockaddr));
559 		from.sin_family = AF_INET;
560 		if (inet_aton(source, &from.sin_addr) == 0)
561 			errx(1, "unknown host %s", source);
562 		ip->ip_src = from.sin_addr;
563 		if (getuid() != 0 &&
564 		    (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
565 		    (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
566 			errx(1, "source is on 127/8, destination is not");
567 
568 		if (getuid() &&
569 		    bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
570 			err(1, "bind");
571 	}
572 
573 	fprintf(stderr, "traceroute to %s (%s)", hostname,
574 		inet_ntoa(to.sin_addr));
575 	if (source)
576 		fprintf(stderr, " from %s", source);
577 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
578 	(void) fflush(stderr);
579 
580 	if (first_ttl > 1)
581 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
582 
583 	for (ttl = first_ttl; ttl <= max_ttl; ++ttl) {
584 		int got_there = 0, unreachable = 0, timeout = 0, loss;
585 		in_addr_t lastaddr = 0;
586 		quad_t dt;
587 
588 		printf("%2u ", ttl);
589 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
590 			int cc;
591 			struct timeval t1, t2;
592 			int code;
593 
594 			(void) gettimeofday(&t1, NULL);
595 			send_probe(++seq, ttl, incflag, &to);
596 			while ((cc = wait_for_reply(s, &from, &t1))) {
597 				(void) gettimeofday(&t2, NULL);
598 				if (t2.tv_sec - t1.tv_sec > waittime) {
599 					cc = 0;
600 					break;
601 				}
602 				i = packet_ok(packet, cc, &from, seq, incflag);
603 				/* Skip short packet */
604 				if (i == 0)
605 					continue;
606 				if (from.sin_addr.s_addr != lastaddr) {
607 					print(packet, cc, &from);
608 					lastaddr = from.sin_addr.s_addr;
609 				}
610 				dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
611 				    (quad_t)(t2.tv_usec - t1.tv_usec);
612 				printf("  %u", (u_int)(dt / 1000));
613 				if (dt % 1000)
614 					printf(".%u", (u_int)(dt % 1000));
615 				printf(" ms");
616 				ip = (struct ip *)packet;
617 				if (ttl_flag)
618 					printf(" (%u)", ip->ip_ttl);
619 				if (i == -2) {
620 #ifndef ARCHAIC
621 					ip = (struct ip *)packet;
622 					if (ip->ip_ttl <= 1)
623 						printf(" !");
624 #endif
625 					++got_there;
626 					break;
627 				}
628 				/* time exceeded in transit */
629 				if (i == -1)
630 					break;
631 				code = i - 1;
632 				switch (code) {
633 				case ICMP_UNREACH_PORT:
634 #ifndef ARCHAIC
635 					ip = (struct ip *)packet;
636 					if (ip->ip_ttl <= 1)
637 						printf(" !");
638 #endif /* ARCHAIC */
639 					++got_there;
640 					break;
641 				case ICMP_UNREACH_NET:
642 					++unreachable;
643 					printf(" !N");
644 					break;
645 				case ICMP_UNREACH_HOST:
646 					++unreachable;
647 					printf(" !H");
648 					break;
649 				case ICMP_UNREACH_PROTOCOL:
650 					++got_there;
651 					printf(" !P");
652 					break;
653 				case ICMP_UNREACH_NEEDFRAG:
654 					++unreachable;
655 					printf(" !F");
656 					break;
657 				case ICMP_UNREACH_SRCFAIL:
658 					++unreachable;
659 					printf(" !S");
660 					break;
661 				case ICMP_UNREACH_FILTER_PROHIB:
662 					++unreachable;
663 					printf(" !X");
664 					break;
665 				case ICMP_UNREACH_NET_PROHIB: /*misuse*/
666 					++unreachable;
667 					printf(" !A");
668 					break;
669 				case ICMP_UNREACH_HOST_PROHIB:
670 					++unreachable;
671 					printf(" !C");
672 					break;
673 				case ICMP_UNREACH_NET_UNKNOWN:
674 				case ICMP_UNREACH_HOST_UNKNOWN:
675 					++unreachable;
676 					printf(" !U");
677 					break;
678 				case ICMP_UNREACH_ISOLATED:
679 					++unreachable;
680 					printf(" !I");
681 					break;
682 				case ICMP_UNREACH_TOSNET:
683 				case ICMP_UNREACH_TOSHOST:
684 					++unreachable;
685 					printf(" !T");
686 					break;
687 				default:
688 					++unreachable;
689 					printf(" !<%d>", i - 1);
690 					break;
691 				}
692 				break;
693 			}
694 			if (cc == 0) {
695 				printf(" *");
696 				timeout++;
697 				loss++;
698 			}
699 			(void) fflush(stdout);
700 		}
701 		if (sump)
702 			printf(" (%d%% loss)", (loss * 100) / nprobes);
703 		putchar('\n');
704 		if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
705 			break;
706 	}
707 	exit(0);
708 }
709 
710 int
711 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
712 {
713 	socklen_t fromlen = sizeof (*from);
714 	struct timeval now, wait;
715 	int cc = 0, fdsn;
716 	fd_set *fdsp;
717 
718 	fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
719 	if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
720 		err(1, "malloc");
721 	memset(fdsp, 0, fdsn);
722 	FD_SET(sock, fdsp);
723 	gettimeofday(&now, NULL);
724 	wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
725 	wait.tv_usec =  sent->tv_usec - now.tv_usec;
726 	if (wait.tv_usec < 0) {
727 		wait.tv_usec += 1000000;
728 		wait.tv_sec--;
729 	}
730 	if (wait.tv_sec < 0)
731 		wait.tv_sec = wait.tv_usec = 0;
732 
733 	if (select(sock+1, fdsp, (fd_set *)0, (fd_set *)0, &wait) > 0)
734 		cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
735 		    (struct sockaddr *)from, &fromlen);
736 
737 	free(fdsp);
738 	return (cc);
739 }
740 
741 void
742 dump_packet(void)
743 {
744 	u_char *p;
745 	int i;
746 
747 	fprintf(stderr, "packet data:");
748 	for (p = outpacket, i = 0; i < datalen; i++) {
749 		if ((i % 24) == 0)
750 			fprintf(stderr, "\n ");
751 		fprintf(stderr, " %02x", *p++);
752 	}
753 	fprintf(stderr, "\n");
754 }
755 
756 void
757 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
758 {
759 	struct ip *ip = (struct ip *)outpacket;
760 	u_char *p = (u_char *)(ip + 1);
761 	struct udphdr *up = (struct udphdr *)(p + lsrrlen);
762 	struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
763 	struct packetdata *op;
764 	struct timeval tv;
765 	int i;
766 
767 	ip->ip_len = htons(datalen);
768 	ip->ip_ttl = ttl;
769 	ip->ip_id = htons(ident+seq);
770 
771 	switch (proto) {
772 	case IPPROTO_ICMP:
773 		icmpp->icmp_type = icmp_type;
774 		icmpp->icmp_code = icmp_code;
775 		icmpp->icmp_seq = htons(seq);
776 		icmpp->icmp_id = htons(ident);
777 		op = (struct packetdata *)(icmpp + 1);
778 		break;
779 	case IPPROTO_UDP:
780 		up->uh_sport = htons(ident);
781 		if (iflag)
782 			up->uh_dport = htons(port+seq);
783 		else
784 			up->uh_dport = htons(port);
785 		up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
786 		    lsrrlen));
787 		up->uh_sum = 0;
788 		op = (struct packetdata *)(up + 1);
789 		break;
790 	default:
791 		op = (struct packetdata *)(ip + 1);
792 		break;
793 	}
794 	op->seq = seq;
795 	op->ttl = ttl;
796 
797 	/*
798 	 * We don't want hostiles snooping the net to get any useful
799 	 * information about us. Send the timestamp in network byte order,
800 	 * and perturb the timestamp enough that they won't know our
801 	 * real clock ticker. We don't want to perturb the time by too
802 	 * much: being off by a suspiciously large amount might indicate
803 	 * OpenBSD.
804 	 *
805 	 * The timestamps in the packet are currently unused. If future
806 	 * work wants to use them they will have to subtract out the
807 	 * perturbation first.
808 	 */
809 	(void) gettimeofday(&tv, NULL);
810 	op->sec = htonl(tv.tv_sec + sec_perturb);
811 	op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
812 
813 	if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
814 		icmpp->icmp_cksum = 0;
815 		icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
816 		    datalen - sizeof(struct ip) - lsrrlen);
817 		if (icmpp->icmp_cksum == 0)
818 			icmpp->icmp_cksum = 0xffff;
819 	}
820 
821 	if (dump)
822 		dump_packet();
823 
824 	i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
825 	    sizeof(struct sockaddr_in));
826 	if (i < 0 || i != datalen)  {
827 		if (i < 0)
828 			perror("sendto");
829 		printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
830 		    datalen, i);
831 		(void) fflush(stdout);
832 	}
833 }
834 
835 static char *ttab[] = {
836 	"Echo Reply",
837 	"ICMP 1",
838 	"ICMP 2",
839 	"Dest Unreachable",
840 	"Source Quench",
841 	"Redirect",
842 	"ICMP 6",
843 	"ICMP 7",
844 	"Echo",
845 	"Router Advert",
846 	"Router Solicit",
847 	"Time Exceeded",
848 	"Param Problem",
849 	"Timestamp",
850 	"Timestamp Reply",
851 	"Info Request",
852 	"Info Reply",
853 	"Mask Request",
854 	"Mask Reply"
855 };
856 
857 /*
858  * Convert an ICMP "type" field to a printable string.
859  */
860 char *
861 pr_type(u_int8_t t)
862 {
863 	if (t > 18)
864 		return ("OUT-OF-RANGE");
865 	return (ttab[t]);
866 }
867 
868 int
869 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
870 {
871 	struct icmp *icp;
872 	u_char code;
873 	u_int8_t type;
874 	int hlen;
875 #ifndef ARCHAIC
876 	struct ip *ip;
877 
878 	ip = (struct ip *) buf;
879 	hlen = ip->ip_hl << 2;
880 	if (cc < hlen + ICMP_MINLEN) {
881 		if (verbose)
882 			printf("packet too short (%d bytes) from %s\n", cc,
883 			    inet_ntoa(from->sin_addr));
884 		return (0);
885 	}
886 	cc -= hlen;
887 	icp = (struct icmp *)(buf + hlen);
888 #else
889 	icp = (struct icmp *)buf;
890 #endif /* ARCHAIC */
891 	type = icp->icmp_type;
892 	code = icp->icmp_code;
893 	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
894 	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
895 		struct ip *hip;
896 		struct udphdr *up;
897 		struct icmp *icmpp;
898 
899 		hip = &icp->icmp_ip;
900 		hlen = hip->ip_hl << 2;
901 
902 		switch (proto) {
903 		case IPPROTO_ICMP:
904 			if (icmp_type == ICMP_ECHO &&
905 			    type == ICMP_ECHOREPLY &&
906 			    icp->icmp_id == htons(ident) &&
907 			    icp->icmp_seq == htons(seq))
908 				return (-2); /* we got there */
909 
910 			icmpp = (struct icmp *)((u_char *)hip + hlen);
911 			if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
912 			    icmpp->icmp_id == htons(ident) &&
913 			    icmpp->icmp_seq == htons(seq))
914 				return (type == ICMP_TIMXCEED? -1 : code + 1);
915 			break;
916 
917 		case IPPROTO_UDP:
918 			up = (struct udphdr *)((u_char *)hip + hlen);
919 			if (hlen + 12 <= cc && hip->ip_p == proto &&
920 			    up->uh_sport == htons(ident) &&
921 			    ((iflag && up->uh_dport == htons(port + seq)) ||
922 			    (!iflag && up->uh_dport == htons(port))))
923 				return (type == ICMP_TIMXCEED? -1 : code + 1);
924 			break;
925 		default:
926 			/* this is some odd, user specified proto,
927 			 * how do we check it?
928 			 */
929 			if (hip->ip_p == proto)
930 				return (type == ICMP_TIMXCEED? -1 : code + 1);
931 		}
932 	}
933 #ifndef ARCHAIC
934 	if (verbose) {
935 		int i;
936 		in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
937 
938 		printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
939 		printf(" to %s", inet_ntoa(ip->ip_dst));
940 		printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
941 		    icp->icmp_code);
942 		for (i = 4; i < cc ; i += sizeof(in_addr_t))
943 			printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
944 	}
945 #endif /* ARCHAIC */
946 	return (0);
947 }
948 
949 void
950 print(u_char *buf, int cc, struct sockaddr_in *from)
951 {
952 	struct ip *ip;
953 	int hlen;
954 
955 	ip = (struct ip *) buf;
956 	hlen = ip->ip_hl << 2;
957 	cc -= hlen;
958 
959 	if (nflag)
960 		printf(" %s", inet_ntoa(from->sin_addr));
961 	else
962 		printf(" %s (%s)", inetname(from->sin_addr),
963 		    inet_ntoa(from->sin_addr));
964 
965 	if (verbose)
966 		printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
967 }
968 
969 
970 /*
971  * Checksum routine for Internet Protocol family headers (C Version)
972  */
973 u_short
974 in_cksum(u_short *addr, int len)
975 {
976 	u_short *w = addr, answer;
977 	int nleft = len, sum = 0;
978 
979 	/*
980 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
981 	 *  we add sequential 16 bit words to it, and at the end, fold
982 	 *  back all the carry bits from the top 16 bits into the lower
983 	 *  16 bits.
984 	 */
985 	while (nleft > 1)  {
986 		sum += *w++;
987 		nleft -= 2;
988 	}
989 
990 	/* mop up an odd byte, if necessary */
991 	if (nleft == 1)
992 		sum += *(u_char *)w;
993 
994 	/*
995 	 * add back carry outs from top 16 bits to low 16 bits
996 	 */
997 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
998 	sum += (sum >> 16);			/* add carry */
999 	answer = ~sum;				/* truncate to 16 bits */
1000 	return (answer);
1001 }
1002 
1003 /*
1004  * Construct an Internet address representation.
1005  * If the nflag has been supplied, give
1006  * numeric value, otherwise try for symbolic name.
1007  */
1008 char *
1009 inetname(struct in_addr in)
1010 {
1011 	static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1012 	static int first = 1;
1013 	struct hostent *hp;
1014 	char *cp;
1015 
1016 	if (first && !nflag) {
1017 		first = 0;
1018 		if (gethostname(domain, sizeof domain) == 0 &&
1019 		    (cp = strchr(domain, '.')) != NULL) {
1020 			strlcpy(domain, cp + 1, sizeof(domain));
1021 		}
1022 	}
1023 	if (!nflag && in.s_addr != INADDR_ANY) {
1024 		hp = gethostbyaddr((char *)&in, sizeof(in), AF_INET);
1025 		if (hp != NULL) {
1026 			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1027 			    strcmp(cp + 1, domain) == 0)
1028 				*cp = '\0';
1029 			strlcpy(line, hp->h_name, sizeof(line));
1030 			return (line);
1031 		}
1032 	}
1033 	return (inet_ntoa(in));
1034 }
1035 
1036 void
1037 usage(void)
1038 {
1039 	extern char *__progname;
1040 
1041 	fprintf(stderr,
1042 	    "usage: %s [-cdDIlnrSv] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1043 	    "\t[-p port] [-P proto] [-q nqueries] [-s src_addr] [-t tos]\n"
1044 	    "\t[-w waittime] host [packetsize]\n", __progname);
1045 	exit(1);
1046 }
1047