1 /*	$OpenBSD: traceroute.c,v 1.61 2004/01/26 18:23:51 deraadt Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Van Jacobson.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $DragonFly: src/usr.sbin/traceroute/traceroute.c,v 1.8 2007/08/03 09:50:05 hasso Exp $
36  * @(#)traceroute.c	8.1 (Berkeley) 6/6/93
37  */
38 
39 /*
40  * traceroute host  - trace the route ip packets follow going to "host".
41  *
42  * Attempt to trace the route an ip packet would follow to some
43  * internet host.  We find out intermediate hops by launching probe
44  * packets with a small ttl (time to live) then listening for an
45  * icmp "time exceeded" reply from a gateway.  We start our probes
46  * with a ttl of one and increase by one until we get an icmp "port
47  * unreachable" (which means we got to "host") or hit a max (which
48  * defaults to 64 hops & can be changed with the -m flag).  Three
49  * probes (change with -q flag) are sent at each ttl setting and a
50  * line is printed showing the ttl, address of the gateway and
51  * round trip time of each probe.  If the probe answers come from
52  * different gateways, the address of each responding system will
53  * be printed.  If there is no response within a 5 sec. timeout
54  * interval (changed with the -w flag), a "*" is printed for that
55  * probe.
56  *
57  * Probe packets are UDP format.  We don't want the destination
58  * host to process them so the destination port is set to an
59  * unlikely value (if some clod on the destination is using that
60  * value, it can be changed with the -p flag).
61  *
62  * A sample use might be:
63  *
64  *     [yak 71]% traceroute nis.nsf.net.
65  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
66  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
67  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
68  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
69  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
70  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
71  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
72  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
73  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
74  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
75  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
76  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
77  *
78  * Note that lines 2 & 3 are the same.  This is due to a buggy
79  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
80  * packets with a zero ttl.
81  *
82  * A more interesting example is:
83  *
84  *     [yak 72]% traceroute allspice.lcs.mit.edu.
85  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
86  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
87  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
88  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
89  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
90  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
91  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
92  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
93  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
94  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
95  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
96  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
97  *     12  * * *
98  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
99  *     14  * * *
100  *     15  * * *
101  *     16  * * *
102  *     17  * * *
103  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
104  *
105  * (I start to see why I'm having so much trouble with mail to
106  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
107  * either don't send ICMP "time exceeded" messages or send them
108  * with a ttl too small to reach us.  14 - 17 are running the
109  * MIT C Gateway code that doesn't send "time exceeded"s.  God
110  * only knows what's going on with 12.
111  *
112  * The silent gateway 12 in the above may be the result of a bug in
113  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
114  * sends an unreachable message using whatever ttl remains in the
115  * original datagram.  Since, for gateways, the remaining ttl is
116  * zero, the icmp "time exceeded" is guaranteed to not make it back
117  * to us.  The behavior of this bug is slightly more interesting
118  * when it appears on the destination system:
119  *
120  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
121  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
122  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
123  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
124  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
125  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
126  *      7  * * *
127  *      8  * * *
128  *      9  * * *
129  *     10  * * *
130  *     11  * * *
131  *     12  * * *
132  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
133  *
134  * Notice that there are 12 "gateways" (13 is the final
135  * destination) and exactly the last half of them are "missing".
136  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
137  * is using the ttl from our arriving datagram as the ttl in its
138  * icmp reply.  So, the reply will time out on the return path
139  * (with no notice sent to anyone since icmp's aren't sent for
140  * icmp's) until we probe with a ttl that's at least twice the path
141  * length.  I.e., rip is really only 7 hops away.  A reply that
142  * returns with a ttl of 1 is a clue this problem exists.
143  * Traceroute prints a "!" after the time if the ttl is <= 1.
144  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
145  * non-standard (HPUX) software, expect to see this problem
146  * frequently and/or take care picking the target host of your
147  * probes.
148  *
149  * Other possible annotations after the time are !H, !N, !P (got a host,
150  * network or protocol unreachable, respectively), !S or !F (source
151  * route failed or fragmentation needed -- neither of these should
152  * ever occur and the associated gateway is busted if you see one).  If
153  * almost all the probes result in some kind of unreachable, traceroute
154  * will give up and exit.
155  *
156  * Notes
157  * -----
158  * This program must be run by root or be setuid.  (I suggest that
159  * you *don't* make it setuid -- casual use could result in a lot
160  * of unnecessary traffic on our poor, congested nets.)
161  *
162  * This program requires a kernel mod that does not appear in any
163  * system available from Berkeley:  A raw ip socket using proto
164  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
165  * opposed to data to be wrapped in a ip datagram).  See the README
166  * file that came with the source to this program for a description
167  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
168  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
169  * MODIFIED TO RUN THIS PROGRAM.
170  *
171  * The udp port usage may appear bizarre (well, ok, it is bizarre).
172  * The problem is that an icmp message only contains 8 bytes of
173  * data from the original datagram.  8 bytes is the size of a udp
174  * header so, if we want to associate replies with the original
175  * datagram, the necessary information must be encoded into the
176  * udp header (the ip id could be used but there's no way to
177  * interlock with the kernel's assignment of ip id's and, anyway,
178  * it would have taken a lot more kernel hacking to allow this
179  * code to set the ip id).  So, to allow two or more users to
180  * use traceroute simultaneously, we use this task's pid as the
181  * source port (the high bit is set to move the port number out
182  * of the "likely" range).  To keep track of which probe is being
183  * replied to (so times and/or hop counts don't get confused by a
184  * reply that was delayed in transit), we increment the destination
185  * port number before each probe.
186  *
187  * Don't use this as a coding example.  I was trying to find a
188  * routing problem and this code sort-of popped out after 48 hours
189  * without sleep.  I was amazed it ever compiled, much less ran.
190  *
191  * I stole the idea for this program from Steve Deering.  Since
192  * the first release, I've learned that had I attended the right
193  * IETF working group meetings, I also could have stolen it from Guy
194  * Almes or Matt Mathis.  I don't know (or care) who came up with
195  * the idea first.  I envy the originators' perspicacity and I'm
196  * glad they didn't keep the idea a secret.
197  *
198  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
199  * enhancements to the original distribution.
200  *
201  * I've hacked up a round-trip-route version of this that works by
202  * sending a loose-source-routed udp datagram through the destination
203  * back to yourself.  Unfortunately, SO many gateways botch source
204  * routing, the thing is almost worthless.  Maybe one day...
205  *
206  *  -- Van Jacobson (van@helios.ee.lbl.gov)
207  *     Tue Dec 20 03:50:13 PST 1988
208  */
209 
210 #include <sys/param.h>
211 #include <sys/time.h>
212 #include <sys/socket.h>
213 #include <sys/file.h>
214 #include <sys/ioctl.h>
215 #include <sys/sysctl.h>
216 
217 #include <netinet/in_systm.h>
218 #include <netinet/in.h>
219 #include <netinet/ip.h>
220 #include <netinet/ip_icmp.h>
221 #include <netinet/ip_var.h>
222 #include <netinet/udp.h>
223 
224 #include <arpa/inet.h>
225 
226 #include <ctype.h>
227 #include <err.h>
228 #include <errno.h>
229 #include <netdb.h>
230 #include <stdio.h>
231 #include <stdlib.h>
232 #include <string.h>
233 #include <unistd.h>
234 
235 #define	MAX_LSRR	((MAX_IPOPTLEN - 4) / 4)
236 
237 /*
238  * Format of the data in a (udp) probe packet.
239  */
240 struct packetdata {
241 	u_char seq;		/* sequence number of this packet */
242 	u_int8_t ttl;		/* ttl packet left with */
243 	u_int32_t sec;		/* time packet left */
244 	u_int32_t usec;
245 };
246 
247 /*
248  * Support for ICMP extensions - RFC4950.
249  */
250 #define ICMP_EXT_OFFSET  8 + 128 /* ICMP type, code, checksum (unused)
251 				  * + original datagram */
252 #define ICMP_EXT_VERSION 2
253 
254 /* ICMP Extension Header according to RFC4884. */
255 #define EXT_VERSION(x)	(((x) & 0xf0000000) >> 28)
256 #define EXT_CHECKSUM(x)	((x) & 0x0000ffff)
257 
258 /*
259  * ICMP extensions, object header
260  */
261 struct icmp_ext_obj_hdr {
262 	u_short length;
263 	u_char  class_num;
264 #define MPLS_STACK_ENTRY_CLASS 1
265 	u_char  c_type;
266 #define MPLS_STACK_ENTRY_C_TYPE 1
267 };
268 
269 /* MPLS Label Stack Object. */
270 #define MPLS_LABEL(x)   (((x) & 0xfffff000) >> 12)
271 #define MPLS_EXP(x)     (((x) & 0x00000e00) >> 9)
272 #define MPLS_STACK(x)   (((x) & 0x00000100) >> 8)
273 #define MPLS_TTL(x)     ((x) & 0x000000ff)
274 
275 struct in_addr gateway[MAX_LSRR + 1];
276 int lsrrlen = 0;
277 int32_t sec_perturb;
278 int32_t usec_perturb;
279 
280 u_char packet[512], *outpacket;	/* last inbound (icmp) packet */
281 
282 void decode_extensions(unsigned char *, int);
283 void dump_packet(void);
284 int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
285 void send_probe(int, u_int8_t, int, struct sockaddr_in *);
286 int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
287 const char *pr_type(u_int8_t);
288 void print(u_char *, int, struct sockaddr_in *);
289 char *inetname(struct in_addr);
290 u_short in_cksum(u_short *, int);
291 void usage(void);
292 
293 int s;				/* receive (icmp) socket file descriptor */
294 int sndsock;			/* send (udp) socket file descriptor */
295 
296 int datalen;			/* How much data */
297 int headerlen;			/* How long packet's header is */
298 
299 char *source = 0;
300 char *hostname;
301 
302 int nprobes = 3;
303 u_int8_t max_ttl = IPDEFTTL;
304 u_int8_t first_ttl = 1;
305 u_short ident;
306 u_short port = 32768+666;	/* start udp dest port # for probe packets */
307 u_char	proto = IPPROTO_UDP;
308 u_int8_t  icmp_type = ICMP_ECHO; /* default ICMP code/type */
309 u_char  icmp_code = 0;
310 int options;			/* socket options */
311 int verbose;
312 int waittime = 5;		/* time to wait for response (in seconds) */
313 int nflag;			/* print addresses numerically */
314 int dump;
315 int Mflag;			/* show MPLS labels if any */
316 
317 int
318 main(int argc, char *argv[])
319 {
320 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
321 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
322 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
323 	size_t size = sizeof(max_ttl);
324 	struct sockaddr_in from, to;
325 	struct hostent *hp;
326 	u_int32_t tmprnd;
327 	struct ip *ip;
328 	u_int8_t ttl;
329 	char *ep;
330 	long l;
331 
332 	if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
333 		err(5, "icmp socket");
334 	if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
335 		err(5, "raw socket");
336 
337 	/* revoke privs */
338 	seteuid(getuid());
339 	setuid(getuid());
340 
341 	sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size, NULL, 0);
342 
343 	while ((ch = getopt(argc, argv, "SDIdg:f:m:np:q:rs:t:w:vlP:cM")) != -1)
344 		switch (ch) {
345 		case 'S':
346 			sump = 1;
347 			break;
348 		case 'f':
349 			errno = 0;
350 			ep = NULL;
351 			l = strtol(optarg, &ep, 10);
352 			if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
353 				errx(1, "min ttl must be 1 to %u.", max_ttl);
354 			first_ttl = (u_int8_t)l;
355 			break;
356 		case 'c':
357 			incflag = 0;
358 			break;
359 		case 'd':
360 			options |= SO_DEBUG;
361 			break;
362 		case 'D':
363 			dump = 1;
364 			break;
365 		case 'g':
366 			if (lsrr >= MAX_LSRR)
367 				errx(1, "too many gateways; max %d", MAX_LSRR);
368 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
369 				hp = gethostbyname(optarg);
370 				if (hp == 0)
371 					errx(1, "unknown host %s", optarg);
372 				memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
373 			}
374 			if (++lsrr == 1)
375 				lsrrlen = 4;
376 			lsrrlen += 4;
377 			break;
378 		case 'I':
379 			if (protoset)
380 				errx(1, "protocol already set with -P");
381 			protoset = 1;
382 			proto = IPPROTO_ICMP;
383 			break;
384 		case 'l':
385 			ttl_flag++;
386 			break;
387 		case 'm':
388 			errno = 0;
389 			ep = NULL;
390 			l = strtol(optarg, &ep, 10);
391 			if (errno || !*optarg || *ep || l < first_ttl ||
392 			    l > MAXTTL)
393 				errx(1, "max ttl must be %u to %u.", first_ttl,
394 				    MAXTTL);
395 			max_ttl = (u_int8_t)l;
396 			break;
397 		case 'M':
398 			Mflag = 1;
399 			break;
400 		case 'n':
401 			nflag++;
402 			break;
403 		case 'p':
404 			errno = 0;
405 			ep = NULL;
406 			l = strtol(optarg, &ep, 10);
407 			if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
408 				errx(1, "port must be >0, <65536.");
409 			port = (int)l;
410 			break;
411 		case 'P':
412 			if (protoset)
413 				errx(1, "protocol already set with -I");
414 			protoset = 1;
415 			errno = 0;
416 			ep = NULL;
417 			l = strtol(optarg, &ep, 10);
418 			if (errno || !*optarg || *ep || l < 1 ||
419 			    l >= IPPROTO_MAX) {
420 				struct protoent *pent;
421 
422 				pent = getprotobyname(optarg);
423 				if (pent)
424 					proto = pent->p_proto;
425 				else
426 					errx(1, "proto must be >=1, or a name.");
427 			} else
428 				proto = (int)l;
429 			break;
430 		case 'q':
431 			errno = 0;
432 			ep = NULL;
433 			l = strtol(optarg, &ep, 10);
434 			if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
435 				errx(1, "nprobes must be >0.");
436 			nprobes = (int)l;
437 			break;
438 		case 'r':
439 			options |= SO_DONTROUTE;
440 			break;
441 		case 's':
442 			/*
443 			 * set the ip source address of the outbound
444 			 * probe (e.g., on a multi-homed host).
445 			 */
446 			source = optarg;
447 			break;
448 		case 't':
449 			errno = 0;
450 			ep = NULL;
451 			l = strtol(optarg, &ep, 10);
452 			if (errno || !*optarg || *ep || l < 0 || l > 255)
453 				errx(1, "tos must be 0 to 255.");
454 			tos = (int)l;
455 			break;
456 		case 'v':
457 			verbose++;
458 			break;
459 		case 'w':
460 			errno = 0;
461 			ep = NULL;
462 			l = strtol(optarg, &ep, 10);
463 			if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
464 				errx(1, "wait must be >1 sec.");
465 			waittime = (int)l;
466 			break;
467 		default:
468 			usage();
469 		}
470 	argc -= optind;
471 	argv += optind;
472 
473 	if (argc < 1)
474 		usage();
475 
476 	setlinebuf (stdout);
477 
478 	memset(&to, 0, sizeof(struct sockaddr));
479 	to.sin_family = AF_INET;
480 	if (inet_aton(*argv, &to.sin_addr) != 0)
481 		hostname = *argv;
482 	else {
483 		hp = gethostbyname(*argv);
484 		if (hp == 0)
485 			errx(1, "unknown host %s", *argv);
486 		to.sin_family = hp->h_addrtype;
487 		memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
488 		if ((hostname = strdup(hp->h_name)) == NULL)
489 			err(1, "malloc");
490 		if (hp->h_addr_list[1] != NULL)
491 			warnx("Warning: %s has multiple addresses; using %s",
492 			    hostname, inet_ntoa(to.sin_addr));
493 	}
494 	if (*++argv) {
495 		errno = 0;
496 		ep = NULL;
497 		l = strtol(*argv, &ep, 10);
498 		if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
499 			errx(1, "datalen out of range");
500 		datalen = (int)l;
501 	}
502 
503 	switch (proto) {
504 	case IPPROTO_UDP:
505 		headerlen = (sizeof(struct ip) + lsrrlen +
506 		    sizeof(struct udphdr) + sizeof(struct packetdata));
507 		break;
508 	case IPPROTO_ICMP:
509 		headerlen = (sizeof(struct ip) + lsrrlen +
510 		    sizeof(struct icmp) + sizeof(struct packetdata));
511 		break;
512 	default:
513 		headerlen = (sizeof(struct ip) + lsrrlen +
514 		    sizeof(struct packetdata));
515 	}
516 
517 	if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
518 		errx(1, "packet size must be 0 to %d.",
519 		    IP_MAXPACKET - headerlen);
520 
521 	datalen += headerlen;
522 
523 	outpacket = (u_char *)malloc(datalen);
524 	if (outpacket == NULL)
525 		err(1, "malloc");
526 	memset(outpacket, 0, datalen);
527 
528 	ip = (struct ip *)outpacket;
529 	if (lsrr != 0) {
530 		u_char *p = (u_char *)(ip + 1);
531 
532 		*p++ = IPOPT_NOP;
533 		*p++ = IPOPT_LSRR;
534 		*p++ = lsrrlen - 1;
535 		*p++ = IPOPT_MINOFF;
536 		gateway[lsrr] = to.sin_addr;
537 		for (i = 1; i <= lsrr; i++) {
538 			memcpy(p, &gateway[i], sizeof(struct in_addr));
539 			p += sizeof(struct in_addr);
540 		}
541 		ip->ip_dst = gateway[0];
542 	} else
543 		ip->ip_dst = to.sin_addr;
544 	ip->ip_off = htons(0);
545 	ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
546 	ip->ip_p = proto;
547 	ip->ip_v = IPVERSION;
548 	ip->ip_tos = tos;
549 
550 	ident = (getpid() & 0xffff) | 0x8000;
551 	tmprnd = arc4random();
552 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
553 	    (tmprnd & 0x7ff);
554 	usec_perturb = arc4random();
555 
556 	if (options & SO_DEBUG)
557 		setsockopt(s, SOL_SOCKET, SO_DEBUG, (char *)&on, sizeof(on));
558 #ifdef SO_SNDBUF
559 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
560 	    sizeof(datalen)) < 0)
561 		err(6, "SO_SNDBUF");
562 #endif /* SO_SNDBUF */
563 #ifdef IP_HDRINCL
564 	if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
565 	    sizeof(on)) < 0)
566 		err(6, "IP_HDRINCL");
567 #endif /* IP_HDRINCL */
568 	if (options & SO_DEBUG)
569 		setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
570 		    (char *)&on, sizeof(on));
571 	if (options & SO_DONTROUTE)
572 		setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
573 		    (char *)&on, sizeof(on));
574 
575 	if (source) {
576 		memset(&from, 0, sizeof(struct sockaddr));
577 		from.sin_family = AF_INET;
578 		if (inet_aton(source, &from.sin_addr) == 0)
579 			errx(1, "unknown host %s", source);
580 		ip->ip_src = from.sin_addr;
581 		if (getuid() != 0 &&
582 		    (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
583 		    (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
584 			errx(1, "source is on 127/8, destination is not");
585 
586 		if (getuid() &&
587 		    bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
588 			err(1, "bind");
589 	}
590 
591 	fprintf(stderr, "traceroute to %s (%s)", hostname,
592 		inet_ntoa(to.sin_addr));
593 	if (source)
594 		fprintf(stderr, " from %s", source);
595 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
596 	fflush(stderr);
597 
598 	if (first_ttl > 1)
599 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
600 
601 	for (ttl = first_ttl; ttl <= max_ttl; ++ttl) {
602 		int got_there = 0, unreachable = 0, timeout = 0, loss;
603 		in_addr_t lastaddr = 0;
604 		quad_t dt;
605 
606 		printf("%2u ", ttl);
607 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
608 			int cc;
609 			struct timeval t1, t2;
610 			int code;
611 
612 			gettimeofday(&t1, NULL);
613 			send_probe(++seq, ttl, incflag, &to);
614 			while ((cc = wait_for_reply(s, &from, &t1))) {
615 				gettimeofday(&t2, NULL);
616 				if (t2.tv_sec - t1.tv_sec > waittime) {
617 					cc = 0;
618 					break;
619 				}
620 				i = packet_ok(packet, cc, &from, seq, incflag);
621 				/* Skip short packet */
622 				if (i == 0)
623 					continue;
624 				if (from.sin_addr.s_addr != lastaddr) {
625 					print(packet, cc, &from);
626 					lastaddr = from.sin_addr.s_addr;
627 				}
628 				dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
629 				    (quad_t)(t2.tv_usec - t1.tv_usec);
630 				printf("  %u", (u_int)(dt / 1000));
631 				if (dt % 1000)
632 					printf(".%u", (u_int)(dt % 1000));
633 				printf(" ms");
634 				ip = (struct ip *)packet;
635 				if (ttl_flag)
636 					printf(" (%u)", ip->ip_ttl);
637 				if (i == -2) {
638 #ifndef ARCHAIC
639 					ip = (struct ip *)packet;
640 					if (ip->ip_ttl <= 1)
641 						printf(" !");
642 #endif
643 					++got_there;
644 					break;
645 				}
646 				/* time exceeded in transit */
647 				if (i == -1)
648 					break;
649 				code = i - 1;
650 				switch (code) {
651 				case ICMP_UNREACH_PORT:
652 #ifndef ARCHAIC
653 					ip = (struct ip *)packet;
654 					if (ip->ip_ttl <= 1)
655 						printf(" !");
656 #endif /* ARCHAIC */
657 					++got_there;
658 					break;
659 				case ICMP_UNREACH_NET:
660 					++unreachable;
661 					printf(" !N");
662 					break;
663 				case ICMP_UNREACH_HOST:
664 					++unreachable;
665 					printf(" !H");
666 					break;
667 				case ICMP_UNREACH_PROTOCOL:
668 					++got_there;
669 					printf(" !P");
670 					break;
671 				case ICMP_UNREACH_NEEDFRAG:
672 					++unreachable;
673 					printf(" !F");
674 					break;
675 				case ICMP_UNREACH_SRCFAIL:
676 					++unreachable;
677 					printf(" !S");
678 					break;
679 				case ICMP_UNREACH_FILTER_PROHIB:
680 					++unreachable;
681 					printf(" !X");
682 					break;
683 				case ICMP_UNREACH_NET_PROHIB: /*misuse*/
684 					++unreachable;
685 					printf(" !A");
686 					break;
687 				case ICMP_UNREACH_HOST_PROHIB:
688 					++unreachable;
689 					printf(" !C");
690 					break;
691 				case ICMP_UNREACH_NET_UNKNOWN:
692 				case ICMP_UNREACH_HOST_UNKNOWN:
693 					++unreachable;
694 					printf(" !U");
695 					break;
696 				case ICMP_UNREACH_ISOLATED:
697 					++unreachable;
698 					printf(" !I");
699 					break;
700 				case ICMP_UNREACH_TOSNET:
701 				case ICMP_UNREACH_TOSHOST:
702 					++unreachable;
703 					printf(" !T");
704 					break;
705 				default:
706 					++unreachable;
707 					printf(" !<%d>", i - 1);
708 					break;
709 				}
710 				break;
711 			}
712 			if (cc == 0) {
713 				printf(" *");
714 				timeout++;
715 				loss++;
716 			}
717 			else if (cc && probe == nprobes - 1 && Mflag)
718 				decode_extensions(packet, cc);
719 			fflush(stdout);
720 		}
721 		if (sump)
722 			printf(" (%d%% loss)", (loss * 100) / nprobes);
723 		putchar('\n');
724 		if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
725 			break;
726 	}
727 	exit(0);
728 }
729 
730 int
731 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
732 {
733 	socklen_t fromlen = sizeof (*from);
734 	struct timeval now, wait;
735 	int cc = 0, fdsn;
736 	fd_set *fdsp;
737 
738 	fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
739 	if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
740 		err(1, "malloc");
741 	memset(fdsp, 0, fdsn);
742 	FD_SET(sock, fdsp);
743 	gettimeofday(&now, NULL);
744 	wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
745 	wait.tv_usec =  sent->tv_usec - now.tv_usec;
746 	if (wait.tv_usec < 0) {
747 		wait.tv_usec += 1000000;
748 		wait.tv_sec--;
749 	}
750 	if (wait.tv_sec < 0)
751 		wait.tv_sec = wait.tv_usec = 0;
752 
753 	if (select(sock+1, fdsp, (fd_set *)0, (fd_set *)0, &wait) > 0)
754 		cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
755 		    (struct sockaddr *)from, &fromlen);
756 
757 	free(fdsp);
758 	return (cc);
759 }
760 
761 void
762 decode_extensions(unsigned char *buf, int ip_len)
763 {
764 	 uint32_t *cmn_hdr;
765 	 struct icmp_ext_obj_hdr *obj_hdr;
766 	 uint32_t mpls_hdr;
767 	 int data_len, obj_len;
768 	 struct ip *ip;
769 
770 	 ip = (struct ip *)buf;
771 
772 	 if (ip_len <= (int)(sizeof(struct ip) + ICMP_EXT_OFFSET)) {
773 		/*
774 		 * No support for ICMP extensions on this host
775 		 */
776 		return;
777 	 }
778 
779 	 /*
780 	  * Move forward to the start of the ICMP extensions, if present
781 	  */
782 	 buf += (ip->ip_hl << 2) + ICMP_EXT_OFFSET;
783 	 cmn_hdr = (uint32_t *)buf;
784 
785 	 if (EXT_VERSION(ntohl(*cmn_hdr)) != ICMP_EXT_VERSION) {
786 		/*
787 		 * Unknown version
788 		 */
789 		return;
790 	 }
791 
792 	 data_len = ip_len - ((u_char *)cmn_hdr - (u_char *)ip);
793 
794 	 /*
795 	  * Check the checksum, cmn_hdr->checksum == 0 means no checksum'ing
796 	  * done by sender.
797 	  *
798 	  * If the checksum is ok, we'll get 0, as the checksum is calculated
799 	  * with the checksum field being 0'd.
800 	  */
801 	 if (EXT_CHECKSUM(ntohl(*cmn_hdr)) &&
802 	     in_cksum((u_short *)cmn_hdr, data_len)) {
803 		return;
804 	 }
805 
806 	 buf += sizeof(*cmn_hdr);
807 	 data_len -= sizeof(*cmn_hdr);
808 
809 	 while (data_len >= (int)sizeof(struct icmp_ext_obj_hdr)) {
810 		unsigned char *nextbuf;
811 
812 		obj_hdr = (struct icmp_ext_obj_hdr *)buf;
813 		obj_len = ntohs(obj_hdr->length);
814 
815 		/*
816 		 * Sanity check the length field
817 		 */
818 		if (obj_len < (int)sizeof(*obj_hdr) || obj_len > data_len)
819 			return;
820 
821 		/* Object has to be 4-byte aligned. */
822 		if (obj_len & 3)
823 			return;
824 
825 		nextbuf = buf + obj_len;
826 		data_len -= obj_len;
827 
828 		/*
829 		 * Move past the object header
830 		 */
831 		buf += sizeof(struct icmp_ext_obj_hdr);
832 		obj_len -= sizeof(struct icmp_ext_obj_hdr);
833 
834 		switch (obj_hdr->class_num) {
835 		case MPLS_STACK_ENTRY_CLASS:
836 			switch (obj_hdr->c_type) {
837 			case MPLS_STACK_ENTRY_C_TYPE:
838 				while (obj_len >= (int)sizeof(uint32_t)) {
839 					mpls_hdr = ntohl(*(uint32_t *)buf);
840 
841 					buf += sizeof(uint32_t);
842 					obj_len -= sizeof(uint32_t);
843 					printf(" [MPLS: Label %d Exp %d]",
844 					    MPLS_LABEL(mpls_hdr),
845 					    MPLS_EXP(mpls_hdr));
846 				}
847 				if (obj_len > 0) {
848 					/*
849 					 * Something went wrong, and we're at
850 					 * a unknown offset into the packet,
851 					 * ditch the rest of it.
852 					 */
853 					return;
854 				}
855 				break;
856 			default:
857 				/*
858 				 * Unknown object, skip past it
859 				 */
860 				buf = nextbuf;
861 				break;
862 			}
863 			break;
864 
865 		default:
866 			/*
867 			 * Unknown object, skip past it
868 			 */
869 			buf = nextbuf;
870 			break;
871 		}
872 	}
873 }
874 
875 void
876 dump_packet(void)
877 {
878 	u_char *p;
879 	int i;
880 
881 	fprintf(stderr, "packet data:");
882 	for (p = outpacket, i = 0; i < datalen; i++) {
883 		if ((i % 24) == 0)
884 			fprintf(stderr, "\n ");
885 		fprintf(stderr, " %02x", *p++);
886 	}
887 	fprintf(stderr, "\n");
888 }
889 
890 void
891 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
892 {
893 	struct ip *ip = (struct ip *)outpacket;
894 	u_char *p = (u_char *)(ip + 1);
895 	struct udphdr *up = (struct udphdr *)(p + lsrrlen);
896 	struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
897 	struct packetdata *op;
898 	struct timeval tv;
899 	int i;
900 
901 	ip->ip_len = datalen;
902 	ip->ip_ttl = ttl;
903 	ip->ip_id = htons(ident+seq);
904 
905 	switch (proto) {
906 	case IPPROTO_ICMP:
907 		icmpp->icmp_type = icmp_type;
908 		icmpp->icmp_code = icmp_code;
909 		icmpp->icmp_seq = htons(seq);
910 		icmpp->icmp_id = htons(ident);
911 		op = (struct packetdata *)(icmpp + 1);
912 		break;
913 	case IPPROTO_UDP:
914 		up->uh_sport = htons(ident);
915 		if (iflag)
916 			up->uh_dport = htons(port+seq);
917 		else
918 			up->uh_dport = htons(port);
919 		up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
920 		    lsrrlen));
921 		up->uh_sum = 0;
922 		op = (struct packetdata *)(up + 1);
923 		break;
924 	default:
925 		op = (struct packetdata *)(ip + 1);
926 		break;
927 	}
928 	op->seq = seq;
929 	op->ttl = ttl;
930 
931 	/*
932 	 * We don't want hostiles snooping the net to get any useful
933 	 * information about us. Send the timestamp in network byte order,
934 	 * and perturb the timestamp enough that they won't know our
935 	 * real clock ticker. We don't want to perturb the time by too
936 	 * much: being off by a suspiciously large amount might indicate
937 	 * OpenBSD.
938 	 *
939 	 * The timestamps in the packet are currently unused. If future
940 	 * work wants to use them they will have to subtract out the
941 	 * perturbation first.
942 	 */
943 	gettimeofday(&tv, NULL);
944 	op->sec = htonl(tv.tv_sec + sec_perturb);
945 	op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
946 
947 	if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
948 		icmpp->icmp_cksum = 0;
949 		icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
950 		    datalen - sizeof(struct ip) - lsrrlen);
951 		if (icmpp->icmp_cksum == 0)
952 			icmpp->icmp_cksum = 0xffff;
953 	}
954 
955 	if (dump)
956 		dump_packet();
957 
958 	i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
959 	    sizeof(struct sockaddr_in));
960 	if (i < 0 || i != datalen)  {
961 		if (i < 0)
962 			perror("sendto");
963 		printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
964 		    datalen, i);
965 		fflush(stdout);
966 	}
967 }
968 
969 static const char *ttab[] = {
970 	"Echo Reply",
971 	"ICMP 1",
972 	"ICMP 2",
973 	"Dest Unreachable",
974 	"Source Quench",
975 	"Redirect",
976 	"ICMP 6",
977 	"ICMP 7",
978 	"Echo",
979 	"Router Advert",
980 	"Router Solicit",
981 	"Time Exceeded",
982 	"Param Problem",
983 	"Timestamp",
984 	"Timestamp Reply",
985 	"Info Request",
986 	"Info Reply",
987 	"Mask Request",
988 	"Mask Reply"
989 };
990 
991 /*
992  * Convert an ICMP "type" field to a printable string.
993  */
994 const char *
995 pr_type(u_int8_t t)
996 {
997 	if (t > 18)
998 		return ("OUT-OF-RANGE");
999 	return (ttab[t]);
1000 }
1001 
1002 int
1003 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
1004 {
1005 	struct icmp *icp;
1006 	u_char code;
1007 	u_int8_t type;
1008 	int hlen;
1009 #ifndef ARCHAIC
1010 	struct ip *ip;
1011 
1012 	ip = (struct ip *) buf;
1013 	hlen = ip->ip_hl << 2;
1014 	if (cc < hlen + ICMP_MINLEN) {
1015 		if (verbose)
1016 			printf("packet too short (%d bytes) from %s\n", cc,
1017 			    inet_ntoa(from->sin_addr));
1018 		return (0);
1019 	}
1020 	cc -= hlen;
1021 	icp = (struct icmp *)(buf + hlen);
1022 #else
1023 	icp = (struct icmp *)buf;
1024 #endif /* ARCHAIC */
1025 	type = icp->icmp_type;
1026 	code = icp->icmp_code;
1027 	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
1028 	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
1029 		struct ip *hip;
1030 		struct udphdr *up;
1031 		struct icmp *icmpp;
1032 
1033 		hip = &icp->icmp_ip;
1034 		hlen = hip->ip_hl << 2;
1035 
1036 		switch (proto) {
1037 		case IPPROTO_ICMP:
1038 			if (icmp_type == ICMP_ECHO &&
1039 			    type == ICMP_ECHOREPLY &&
1040 			    icp->icmp_id == htons(ident) &&
1041 			    icp->icmp_seq == htons(seq))
1042 				return (-2); /* we got there */
1043 
1044 			icmpp = (struct icmp *)((u_char *)hip + hlen);
1045 			if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
1046 			    icmpp->icmp_id == htons(ident) &&
1047 			    icmpp->icmp_seq == htons(seq))
1048 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1049 			break;
1050 
1051 		case IPPROTO_UDP:
1052 			up = (struct udphdr *)((u_char *)hip + hlen);
1053 			if (hlen + 12 <= cc && hip->ip_p == proto &&
1054 			    up->uh_sport == htons(ident) &&
1055 			    ((iflag && up->uh_dport == htons(port + seq)) ||
1056 			    (!iflag && up->uh_dport == htons(port))))
1057 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1058 			break;
1059 		default:
1060 			/* this is some odd, user specified proto,
1061 			 * how do we check it?
1062 			 */
1063 			if (hip->ip_p == proto)
1064 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1065 		}
1066 	}
1067 #ifndef ARCHAIC
1068 	if (verbose) {
1069 		int i;
1070 		in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
1071 
1072 		printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
1073 		printf(" to %s", inet_ntoa(ip->ip_dst));
1074 		printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
1075 		    icp->icmp_code);
1076 		for (i = 4; i < cc ; i += sizeof(in_addr_t))
1077 			printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
1078 	}
1079 #endif /* ARCHAIC */
1080 	return (0);
1081 }
1082 
1083 void
1084 print(u_char *buf, int cc, struct sockaddr_in *from)
1085 {
1086 	struct ip *ip;
1087 	int hlen;
1088 
1089 	ip = (struct ip *) buf;
1090 	hlen = ip->ip_hl << 2;
1091 	cc -= hlen;
1092 
1093 	if (nflag)
1094 		printf(" %s", inet_ntoa(from->sin_addr));
1095 	else
1096 		printf(" %s (%s)", inetname(from->sin_addr),
1097 		    inet_ntoa(from->sin_addr));
1098 
1099 	if (verbose)
1100 		printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
1101 }
1102 
1103 
1104 /*
1105  * Checksum routine for Internet Protocol family headers (C Version)
1106  */
1107 u_short
1108 in_cksum(u_short *addr, int len)
1109 {
1110 	u_short *w = addr, answer;
1111 	int nleft = len, sum = 0;
1112 
1113 	/*
1114 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
1115 	 *  we add sequential 16 bit words to it, and at the end, fold
1116 	 *  back all the carry bits from the top 16 bits into the lower
1117 	 *  16 bits.
1118 	 */
1119 	while (nleft > 1)  {
1120 		sum += *w++;
1121 		nleft -= 2;
1122 	}
1123 
1124 	/* mop up an odd byte, if necessary */
1125 	if (nleft == 1)
1126 		sum += *(u_char *)w;
1127 
1128 	/*
1129 	 * add back carry outs from top 16 bits to low 16 bits
1130 	 */
1131 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
1132 	sum += (sum >> 16);			/* add carry */
1133 	answer = ~sum;				/* truncate to 16 bits */
1134 	return (answer);
1135 }
1136 
1137 /*
1138  * Construct an Internet address representation.
1139  * If the nflag has been supplied, give
1140  * numeric value, otherwise try for symbolic name.
1141  */
1142 char *
1143 inetname(struct in_addr in)
1144 {
1145 	static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1146 	static int first = 1;
1147 	struct hostent *hp;
1148 	char *cp;
1149 
1150 	if (first && !nflag) {
1151 		first = 0;
1152 		if (gethostname(domain, sizeof domain) == 0 &&
1153 		    (cp = strchr(domain, '.')) != NULL) {
1154 			strlcpy(domain, cp + 1, sizeof(domain));
1155 		}
1156 	}
1157 	if (!nflag && in.s_addr != INADDR_ANY) {
1158 		hp = gethostbyaddr((char *)&in, sizeof(in), AF_INET);
1159 		if (hp != NULL) {
1160 			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1161 			    strcmp(cp + 1, domain) == 0)
1162 				*cp = '\0';
1163 			strlcpy(line, hp->h_name, sizeof(line));
1164 			return (line);
1165 		}
1166 	}
1167 	return (inet_ntoa(in));
1168 }
1169 
1170 void
1171 usage(void)
1172 {
1173 	fprintf(stderr,
1174 	    "usage: %s [-cdDIlMnrSv] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1175 	    "\t[-p port] [-P proto] [-q nqueries] [-s src_addr] [-t tos]\n"
1176 	    "\t[-w waittime] host [packetsize]\n", getprogname());
1177 	exit(1);
1178 }
1179