1 /*	$OpenBSD: traceroute.c,v 1.61 2004/01/26 18:23:51 deraadt Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Van Jacobson.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $DragonFly: src/usr.sbin/traceroute/traceroute.c,v 1.9 2007/10/25 08:12:42 hasso Exp $
36  * @(#)traceroute.c	8.1 (Berkeley) 6/6/93
37  */
38 
39 /*
40  * traceroute host  - trace the route ip packets follow going to "host".
41  *
42  * Attempt to trace the route an ip packet would follow to some
43  * internet host.  We find out intermediate hops by launching probe
44  * packets with a small ttl (time to live) then listening for an
45  * icmp "time exceeded" reply from a gateway.  We start our probes
46  * with a ttl of one and increase by one until we get an icmp "port
47  * unreachable" (which means we got to "host") or hit a max (which
48  * defaults to 64 hops & can be changed with the -m flag).  Three
49  * probes (change with -q flag) are sent at each ttl setting and a
50  * line is printed showing the ttl, address of the gateway and
51  * round trip time of each probe.  If the probe answers come from
52  * different gateways, the address of each responding system will
53  * be printed.  If there is no response within a 5 sec. timeout
54  * interval (changed with the -w flag), a "*" is printed for that
55  * probe.
56  *
57  * Probe packets are UDP format.  We don't want the destination
58  * host to process them so the destination port is set to an
59  * unlikely value (if some clod on the destination is using that
60  * value, it can be changed with the -p flag).
61  *
62  * A sample use might be:
63  *
64  *     [yak 71]% traceroute nis.nsf.net.
65  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
66  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
67  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
68  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
69  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
70  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
71  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
72  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
73  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
74  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
75  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
76  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
77  *
78  * Note that lines 2 & 3 are the same.  This is due to a buggy
79  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
80  * packets with a zero ttl.
81  *
82  * A more interesting example is:
83  *
84  *     [yak 72]% traceroute allspice.lcs.mit.edu.
85  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
86  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
87  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
88  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
89  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
90  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
91  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
92  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
93  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
94  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
95  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
96  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
97  *     12  * * *
98  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
99  *     14  * * *
100  *     15  * * *
101  *     16  * * *
102  *     17  * * *
103  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
104  *
105  * (I start to see why I'm having so much trouble with mail to
106  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
107  * either don't send ICMP "time exceeded" messages or send them
108  * with a ttl too small to reach us.  14 - 17 are running the
109  * MIT C Gateway code that doesn't send "time exceeded"s.  God
110  * only knows what's going on with 12.
111  *
112  * The silent gateway 12 in the above may be the result of a bug in
113  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
114  * sends an unreachable message using whatever ttl remains in the
115  * original datagram.  Since, for gateways, the remaining ttl is
116  * zero, the icmp "time exceeded" is guaranteed to not make it back
117  * to us.  The behavior of this bug is slightly more interesting
118  * when it appears on the destination system:
119  *
120  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
121  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
122  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
123  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
124  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
125  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
126  *      7  * * *
127  *      8  * * *
128  *      9  * * *
129  *     10  * * *
130  *     11  * * *
131  *     12  * * *
132  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
133  *
134  * Notice that there are 12 "gateways" (13 is the final
135  * destination) and exactly the last half of them are "missing".
136  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
137  * is using the ttl from our arriving datagram as the ttl in its
138  * icmp reply.  So, the reply will time out on the return path
139  * (with no notice sent to anyone since icmp's aren't sent for
140  * icmp's) until we probe with a ttl that's at least twice the path
141  * length.  I.e., rip is really only 7 hops away.  A reply that
142  * returns with a ttl of 1 is a clue this problem exists.
143  * Traceroute prints a "!" after the time if the ttl is <= 1.
144  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
145  * non-standard (HPUX) software, expect to see this problem
146  * frequently and/or take care picking the target host of your
147  * probes.
148  *
149  * Other possible annotations after the time are !H, !N, !P (got a host,
150  * network or protocol unreachable, respectively), !S or !F (source
151  * route failed or fragmentation needed -- neither of these should
152  * ever occur and the associated gateway is busted if you see one).  If
153  * almost all the probes result in some kind of unreachable, traceroute
154  * will give up and exit.
155  *
156  * Notes
157  * -----
158  * This program must be run by root or be setuid.  (I suggest that
159  * you *don't* make it setuid -- casual use could result in a lot
160  * of unnecessary traffic on our poor, congested nets.)
161  *
162  * This program requires a kernel mod that does not appear in any
163  * system available from Berkeley:  A raw ip socket using proto
164  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
165  * opposed to data to be wrapped in a ip datagram).  See the README
166  * file that came with the source to this program for a description
167  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
168  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
169  * MODIFIED TO RUN THIS PROGRAM.
170  *
171  * The udp port usage may appear bizarre (well, ok, it is bizarre).
172  * The problem is that an icmp message only contains 8 bytes of
173  * data from the original datagram.  8 bytes is the size of a udp
174  * header so, if we want to associate replies with the original
175  * datagram, the necessary information must be encoded into the
176  * udp header (the ip id could be used but there's no way to
177  * interlock with the kernel's assignment of ip id's and, anyway,
178  * it would have taken a lot more kernel hacking to allow this
179  * code to set the ip id).  So, to allow two or more users to
180  * use traceroute simultaneously, we use this task's pid as the
181  * source port (the high bit is set to move the port number out
182  * of the "likely" range).  To keep track of which probe is being
183  * replied to (so times and/or hop counts don't get confused by a
184  * reply that was delayed in transit), we increment the destination
185  * port number before each probe.
186  *
187  * Don't use this as a coding example.  I was trying to find a
188  * routing problem and this code sort-of popped out after 48 hours
189  * without sleep.  I was amazed it ever compiled, much less ran.
190  *
191  * I stole the idea for this program from Steve Deering.  Since
192  * the first release, I've learned that had I attended the right
193  * IETF working group meetings, I also could have stolen it from Guy
194  * Almes or Matt Mathis.  I don't know (or care) who came up with
195  * the idea first.  I envy the originators' perspicacity and I'm
196  * glad they didn't keep the idea a secret.
197  *
198  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
199  * enhancements to the original distribution.
200  *
201  * I've hacked up a round-trip-route version of this that works by
202  * sending a loose-source-routed udp datagram through the destination
203  * back to yourself.  Unfortunately, SO many gateways botch source
204  * routing, the thing is almost worthless.  Maybe one day...
205  *
206  *  -- Van Jacobson (van@helios.ee.lbl.gov)
207  *     Tue Dec 20 03:50:13 PST 1988
208  */
209 
210 #include <sys/param.h>
211 #include <sys/time.h>
212 #include <sys/socket.h>
213 #include <sys/file.h>
214 #include <sys/ioctl.h>
215 #include <sys/sysctl.h>
216 
217 #include <netinet/in_systm.h>
218 #include <netinet/in.h>
219 #include <netinet/ip.h>
220 #include <netinet/ip_icmp.h>
221 #include <netinet/ip_var.h>
222 #include <netinet/udp.h>
223 
224 #include <arpa/inet.h>
225 
226 #include <ctype.h>
227 #include <err.h>
228 #include <errno.h>
229 #include <netdb.h>
230 #include <stdio.h>
231 #include <stdlib.h>
232 #include <string.h>
233 #include <unistd.h>
234 
235 #define	MAX_LSRR	((MAX_IPOPTLEN - 4) / 4)
236 
237 /*
238  * Format of the data in a (udp) probe packet.
239  */
240 struct packetdata {
241 	u_char seq;		/* sequence number of this packet */
242 	u_int8_t ttl;		/* ttl packet left with */
243 	u_int32_t sec;		/* time packet left */
244 	u_int32_t usec;
245 };
246 
247 /*
248  * Support for ICMP extensions - RFC4950.
249  */
250 #define ICMP_EXT_OFFSET  8 + 128 /* ICMP type, code, checksum (unused)
251 				  * + original datagram */
252 #define ICMP_EXT_VERSION 2
253 
254 /* ICMP Extension Header according to RFC4884. */
255 #define EXT_VERSION(x)	(((x) & 0xf0000000) >> 28)
256 #define EXT_CHECKSUM(x)	((x) & 0x0000ffff)
257 
258 /*
259  * ICMP extensions, object header
260  */
261 struct icmp_ext_obj_hdr {
262 	u_short length;
263 	u_char  class_num;
264 #define MPLS_STACK_ENTRY_CLASS 1
265 	u_char  c_type;
266 #define MPLS_STACK_ENTRY_C_TYPE 1
267 };
268 
269 /* MPLS Label Stack Object. */
270 #define MPLS_LABEL(x)   (((x) & 0xfffff000) >> 12)
271 #define MPLS_EXP(x)     (((x) & 0x00000e00) >> 9)
272 #define MPLS_STACK(x)   (((x) & 0x00000100) >> 8)
273 #define MPLS_TTL(x)     ((x) & 0x000000ff)
274 
275 struct in_addr gateway[MAX_LSRR + 1];
276 int lsrrlen = 0;
277 int32_t sec_perturb;
278 int32_t usec_perturb;
279 
280 u_char packet[512], *outpacket;	/* last inbound (icmp) packet */
281 
282 void decode_extensions(unsigned char *, int);
283 void dump_packet(void);
284 int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
285 void send_probe(int, u_int8_t, int, struct sockaddr_in *);
286 int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
287 const char *pr_type(u_int8_t);
288 void print(u_char *, int, struct sockaddr_in *);
289 char *inetname(struct in_addr);
290 u_short in_cksum(u_short *, int);
291 void usage(void);
292 
293 int s;				/* receive (icmp) socket file descriptor */
294 int sndsock;			/* send (udp) socket file descriptor */
295 
296 int datalen;			/* How much data */
297 int headerlen;			/* How long packet's header is */
298 
299 char *source = 0;
300 char *hostname;
301 
302 int nprobes = 3;
303 u_int8_t max_ttl = IPDEFTTL;
304 u_int8_t first_ttl = 1;
305 u_short ident;
306 u_short port = 32768+666;	/* start udp dest port # for probe packets */
307 u_char	proto = IPPROTO_UDP;
308 u_int8_t  icmp_type = ICMP_ECHO; /* default ICMP code/type */
309 u_char  icmp_code = 0;
310 int options;			/* socket options */
311 int verbose;
312 int waittime = 5;		/* time to wait for response (in seconds) */
313 int nflag;			/* print addresses numerically */
314 int dump;
315 int Mflag;			/* show MPLS labels if any */
316 
317 int
318 main(int argc, char *argv[])
319 {
320 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
321 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
322 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
323 	size_t size = sizeof(max_ttl);
324 	struct sockaddr_in from, to;
325 	struct hostent *hp;
326 	u_int32_t tmprnd;
327 	struct ip *ip;
328 	u_int8_t ttl;
329 	char *ep;
330 	long l;
331 
332 	if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
333 		err(5, "icmp socket");
334 	if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
335 		err(5, "raw socket");
336 
337 	/* revoke privs */
338 	seteuid(getuid());
339 	setuid(getuid());
340 
341 	sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size, NULL, 0);
342 
343 	while ((ch = getopt(argc, argv, "SDIdg:f:m:np:q:rs:t:w:vlP:cM")) != -1)
344 		switch (ch) {
345 		case 'S':
346 			sump = 1;
347 			break;
348 		case 'f':
349 			errno = 0;
350 			ep = NULL;
351 			l = strtol(optarg, &ep, 10);
352 			if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
353 				errx(1, "min ttl must be 1 to %u.", max_ttl);
354 			first_ttl = (u_int8_t)l;
355 			break;
356 		case 'c':
357 			incflag = 0;
358 			break;
359 		case 'd':
360 			options |= SO_DEBUG;
361 			break;
362 		case 'D':
363 			dump = 1;
364 			break;
365 		case 'g':
366 			if (lsrr >= MAX_LSRR)
367 				errx(1, "too many gateways; max %d", MAX_LSRR);
368 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
369 				hp = gethostbyname(optarg);
370 				if (hp == 0)
371 					errx(1, "unknown host %s", optarg);
372 				memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
373 			}
374 			if (++lsrr == 1)
375 				lsrrlen = 4;
376 			lsrrlen += 4;
377 			break;
378 		case 'I':
379 			if (protoset)
380 				errx(1, "protocol already set with -P");
381 			protoset = 1;
382 			proto = IPPROTO_ICMP;
383 			break;
384 		case 'l':
385 			ttl_flag++;
386 			break;
387 		case 'm':
388 			errno = 0;
389 			ep = NULL;
390 			l = strtol(optarg, &ep, 10);
391 			if (errno || !*optarg || *ep || l < first_ttl ||
392 			    l > MAXTTL)
393 				errx(1, "max ttl must be %u to %u.", first_ttl,
394 				    MAXTTL);
395 			max_ttl = (u_int8_t)l;
396 			break;
397 		case 'M':
398 			Mflag = 1;
399 			break;
400 		case 'n':
401 			nflag++;
402 			break;
403 		case 'p':
404 			errno = 0;
405 			ep = NULL;
406 			l = strtol(optarg, &ep, 10);
407 			if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
408 				errx(1, "port must be >0, <65536.");
409 			port = (int)l;
410 			break;
411 		case 'P':
412 			if (protoset)
413 				errx(1, "protocol already set with -I");
414 			protoset = 1;
415 			errno = 0;
416 			ep = NULL;
417 			l = strtol(optarg, &ep, 10);
418 			if (errno || !*optarg || *ep || l < 1 ||
419 			    l >= IPPROTO_MAX) {
420 				struct protoent *pent;
421 
422 				pent = getprotobyname(optarg);
423 				if (pent)
424 					proto = pent->p_proto;
425 				else
426 					errx(1, "proto must be >=1, or a name.");
427 			} else
428 				proto = (int)l;
429 			break;
430 		case 'q':
431 			errno = 0;
432 			ep = NULL;
433 			l = strtol(optarg, &ep, 10);
434 			if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
435 				errx(1, "nprobes must be >0.");
436 			nprobes = (int)l;
437 			break;
438 		case 'r':
439 			options |= SO_DONTROUTE;
440 			break;
441 		case 's':
442 			/*
443 			 * set the ip source address of the outbound
444 			 * probe (e.g., on a multi-homed host).
445 			 */
446 			source = optarg;
447 			break;
448 		case 't':
449 			errno = 0;
450 			ep = NULL;
451 			l = strtol(optarg, &ep, 10);
452 			if (errno || !*optarg || *ep || l < 0 || l > 255)
453 				errx(1, "tos must be 0 to 255.");
454 			tos = (int)l;
455 			break;
456 		case 'v':
457 			verbose++;
458 			break;
459 		case 'w':
460 			errno = 0;
461 			ep = NULL;
462 			l = strtol(optarg, &ep, 10);
463 			if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
464 				errx(1, "wait must be >1 sec.");
465 			waittime = (int)l;
466 			break;
467 		default:
468 			usage();
469 		}
470 	argc -= optind;
471 	argv += optind;
472 
473 	if (argc < 1)
474 		usage();
475 
476 	setlinebuf (stdout);
477 
478 	memset(&to, 0, sizeof(struct sockaddr));
479 	to.sin_family = AF_INET;
480 	if (inet_aton(*argv, &to.sin_addr) != 0)
481 		hostname = *argv;
482 	else {
483 		hp = gethostbyname(*argv);
484 		if (hp == 0)
485 			errx(1, "unknown host %s", *argv);
486 		to.sin_family = hp->h_addrtype;
487 		memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
488 		if ((hostname = strdup(hp->h_name)) == NULL)
489 			err(1, "malloc");
490 		if (hp->h_addr_list[1] != NULL)
491 			warnx("Warning: %s has multiple addresses; using %s",
492 			    hostname, inet_ntoa(to.sin_addr));
493 	}
494 	if (*++argv) {
495 		errno = 0;
496 		ep = NULL;
497 		l = strtol(*argv, &ep, 10);
498 		if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
499 			errx(1, "datalen out of range");
500 		datalen = (int)l;
501 	}
502 
503 	switch (proto) {
504 	case IPPROTO_UDP:
505 		headerlen = (sizeof(struct ip) + lsrrlen +
506 		    sizeof(struct udphdr) + sizeof(struct packetdata));
507 		break;
508 	case IPPROTO_ICMP:
509 		headerlen = (sizeof(struct ip) + lsrrlen +
510 		    sizeof(struct icmp) + sizeof(struct packetdata));
511 		break;
512 	default:
513 		headerlen = (sizeof(struct ip) + lsrrlen +
514 		    sizeof(struct packetdata));
515 	}
516 
517 	if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
518 		errx(1, "packet size must be 0 to %d.",
519 		    IP_MAXPACKET - headerlen);
520 
521 	datalen += headerlen;
522 
523 	outpacket = (u_char *)malloc(datalen);
524 	if (outpacket == NULL)
525 		err(1, "malloc");
526 	memset(outpacket, 0, datalen);
527 
528 	ip = (struct ip *)outpacket;
529 	if (lsrr != 0) {
530 		u_char *p = (u_char *)(ip + 1);
531 
532 		*p++ = IPOPT_NOP;
533 		*p++ = IPOPT_LSRR;
534 		*p++ = lsrrlen - 1;
535 		*p++ = IPOPT_MINOFF;
536 		gateway[lsrr] = to.sin_addr;
537 		for (i = 1; i <= lsrr; i++) {
538 			memcpy(p, &gateway[i], sizeof(struct in_addr));
539 			p += sizeof(struct in_addr);
540 		}
541 		ip->ip_dst = gateway[0];
542 	} else
543 		ip->ip_dst = to.sin_addr;
544 	ip->ip_off = htons(0);
545 	ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
546 	ip->ip_p = proto;
547 	ip->ip_v = IPVERSION;
548 	ip->ip_tos = tos;
549 
550 	ident = (getpid() & 0xffff) | 0x8000;
551 	tmprnd = arc4random();
552 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
553 	    (tmprnd & 0x7ff);
554 	usec_perturb = arc4random();
555 
556 	if (options & SO_DEBUG)
557 		setsockopt(s, SOL_SOCKET, SO_DEBUG, (char *)&on, sizeof(on));
558 #ifdef SO_SNDBUF
559 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
560 	    sizeof(datalen)) < 0)
561 		err(6, "SO_SNDBUF");
562 #endif /* SO_SNDBUF */
563 #ifdef IP_HDRINCL
564 	if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
565 	    sizeof(on)) < 0)
566 		err(6, "IP_HDRINCL");
567 #endif /* IP_HDRINCL */
568 	if (options & SO_DEBUG)
569 		setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
570 		    (char *)&on, sizeof(on));
571 	if (options & SO_DONTROUTE)
572 		setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
573 		    (char *)&on, sizeof(on));
574 
575 	if (source) {
576 		memset(&from, 0, sizeof(struct sockaddr));
577 		from.sin_family = AF_INET;
578 		if (inet_aton(source, &from.sin_addr) == 0)
579 			errx(1, "unknown host %s", source);
580 		ip->ip_src = from.sin_addr;
581 		if (getuid() != 0 &&
582 		    (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
583 		    (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
584 			errx(1, "source is on 127/8, destination is not");
585 
586 		if (getuid() &&
587 		    bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
588 			err(1, "bind");
589 	}
590 
591 	fprintf(stderr, "traceroute to %s (%s)", hostname,
592 		inet_ntoa(to.sin_addr));
593 	if (source)
594 		fprintf(stderr, " from %s", source);
595 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
596 	fflush(stderr);
597 
598 	if (first_ttl > 1)
599 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
600 
601 	for (ttl = first_ttl; ttl <= max_ttl; ++ttl) {
602 		int got_there = 0, unreachable = 0, timeout = 0, loss;
603 		int gotlastaddr = 0;
604 		in_addr_t lastaddr = 0;
605 		quad_t dt;
606 
607 		printf("%2u ", ttl);
608 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
609 			int cc;
610 			struct timeval t1, t2;
611 			int code;
612 
613 			gettimeofday(&t1, NULL);
614 			send_probe(++seq, ttl, incflag, &to);
615 			while ((cc = wait_for_reply(s, &from, &t1))) {
616 				gettimeofday(&t2, NULL);
617 				if (t2.tv_sec - t1.tv_sec > waittime) {
618 					cc = 0;
619 					break;
620 				}
621 				i = packet_ok(packet, cc, &from, seq, incflag);
622 				/* Skip short packet */
623 				if (i == 0)
624 					continue;
625 				if (!gotlastaddr ||
626 				    from.sin_addr.s_addr != lastaddr) {
627 					if (gotlastaddr)
628 						printf("\n   ");
629 					print(packet, cc, &from);
630 					lastaddr = from.sin_addr.s_addr;
631 					++gotlastaddr;
632 				}
633 				dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
634 				    (quad_t)(t2.tv_usec - t1.tv_usec);
635 				printf("  %u", (u_int)(dt / 1000));
636 				if (dt % 1000)
637 					printf(".%u", (u_int)(dt % 1000));
638 				printf(" ms");
639 				ip = (struct ip *)packet;
640 				if (ttl_flag)
641 					printf(" (%u)", ip->ip_ttl);
642 				if (i == -2) {
643 #ifndef ARCHAIC
644 					ip = (struct ip *)packet;
645 					if (ip->ip_ttl <= 1)
646 						printf(" !");
647 #endif
648 					++got_there;
649 					break;
650 				}
651 				/* time exceeded in transit */
652 				if (i == -1)
653 					break;
654 				code = i - 1;
655 				switch (code) {
656 				case ICMP_UNREACH_PORT:
657 #ifndef ARCHAIC
658 					ip = (struct ip *)packet;
659 					if (ip->ip_ttl <= 1)
660 						printf(" !");
661 #endif /* ARCHAIC */
662 					++got_there;
663 					break;
664 				case ICMP_UNREACH_NET:
665 					++unreachable;
666 					printf(" !N");
667 					break;
668 				case ICMP_UNREACH_HOST:
669 					++unreachable;
670 					printf(" !H");
671 					break;
672 				case ICMP_UNREACH_PROTOCOL:
673 					++got_there;
674 					printf(" !P");
675 					break;
676 				case ICMP_UNREACH_NEEDFRAG:
677 					++unreachable;
678 					printf(" !F");
679 					break;
680 				case ICMP_UNREACH_SRCFAIL:
681 					++unreachable;
682 					printf(" !S");
683 					break;
684 				case ICMP_UNREACH_FILTER_PROHIB:
685 					++unreachable;
686 					printf(" !X");
687 					break;
688 				case ICMP_UNREACH_NET_PROHIB: /*misuse*/
689 					++unreachable;
690 					printf(" !A");
691 					break;
692 				case ICMP_UNREACH_HOST_PROHIB:
693 					++unreachable;
694 					printf(" !C");
695 					break;
696 				case ICMP_UNREACH_NET_UNKNOWN:
697 				case ICMP_UNREACH_HOST_UNKNOWN:
698 					++unreachable;
699 					printf(" !U");
700 					break;
701 				case ICMP_UNREACH_ISOLATED:
702 					++unreachable;
703 					printf(" !I");
704 					break;
705 				case ICMP_UNREACH_TOSNET:
706 				case ICMP_UNREACH_TOSHOST:
707 					++unreachable;
708 					printf(" !T");
709 					break;
710 				default:
711 					++unreachable;
712 					printf(" !<%d>", i - 1);
713 					break;
714 				}
715 				break;
716 			}
717 			if (cc == 0) {
718 				printf(" *");
719 				timeout++;
720 				loss++;
721 			}
722 			else if (cc && probe == nprobes - 1 && Mflag)
723 				decode_extensions(packet, cc);
724 			fflush(stdout);
725 		}
726 		if (sump)
727 			printf(" (%d%% loss)", (loss * 100) / nprobes);
728 		putchar('\n');
729 		if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
730 			break;
731 	}
732 	exit(0);
733 }
734 
735 int
736 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
737 {
738 	socklen_t fromlen = sizeof (*from);
739 	struct timeval now, wait;
740 	int cc = 0, fdsn;
741 	fd_set *fdsp;
742 
743 	fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
744 	if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
745 		err(1, "malloc");
746 	memset(fdsp, 0, fdsn);
747 	FD_SET(sock, fdsp);
748 	gettimeofday(&now, NULL);
749 	wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
750 	wait.tv_usec =  sent->tv_usec - now.tv_usec;
751 	if (wait.tv_usec < 0) {
752 		wait.tv_usec += 1000000;
753 		wait.tv_sec--;
754 	}
755 	if (wait.tv_sec < 0)
756 		wait.tv_sec = wait.tv_usec = 0;
757 
758 	if (select(sock+1, fdsp, (fd_set *)0, (fd_set *)0, &wait) > 0)
759 		cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
760 		    (struct sockaddr *)from, &fromlen);
761 
762 	free(fdsp);
763 	return (cc);
764 }
765 
766 void
767 decode_extensions(unsigned char *buf, int ip_len)
768 {
769 	 uint32_t *cmn_hdr;
770 	 struct icmp_ext_obj_hdr *obj_hdr;
771 	 uint32_t mpls_hdr;
772 	 int data_len, obj_len;
773 	 struct ip *ip;
774 
775 	 ip = (struct ip *)buf;
776 
777 	 if (ip_len <= (int)(sizeof(struct ip) + ICMP_EXT_OFFSET)) {
778 		/*
779 		 * No support for ICMP extensions on this host
780 		 */
781 		return;
782 	 }
783 
784 	 /*
785 	  * Move forward to the start of the ICMP extensions, if present
786 	  */
787 	 buf += (ip->ip_hl << 2) + ICMP_EXT_OFFSET;
788 	 cmn_hdr = (uint32_t *)buf;
789 
790 	 if (EXT_VERSION(ntohl(*cmn_hdr)) != ICMP_EXT_VERSION) {
791 		/*
792 		 * Unknown version
793 		 */
794 		return;
795 	 }
796 
797 	 data_len = ip_len - ((u_char *)cmn_hdr - (u_char *)ip);
798 
799 	 /*
800 	  * Check the checksum, cmn_hdr->checksum == 0 means no checksum'ing
801 	  * done by sender.
802 	  *
803 	  * If the checksum is ok, we'll get 0, as the checksum is calculated
804 	  * with the checksum field being 0'd.
805 	  */
806 	 if (EXT_CHECKSUM(ntohl(*cmn_hdr)) &&
807 	     in_cksum((u_short *)cmn_hdr, data_len)) {
808 		return;
809 	 }
810 
811 	 buf += sizeof(*cmn_hdr);
812 	 data_len -= sizeof(*cmn_hdr);
813 
814 	 while (data_len >= (int)sizeof(struct icmp_ext_obj_hdr)) {
815 		unsigned char *nextbuf;
816 
817 		obj_hdr = (struct icmp_ext_obj_hdr *)buf;
818 		obj_len = ntohs(obj_hdr->length);
819 
820 		/*
821 		 * Sanity check the length field
822 		 */
823 		if (obj_len < (int)sizeof(*obj_hdr) || obj_len > data_len)
824 			return;
825 
826 		/* Object has to be 4-byte aligned. */
827 		if (obj_len & 3)
828 			return;
829 
830 		nextbuf = buf + obj_len;
831 		data_len -= obj_len;
832 
833 		/*
834 		 * Move past the object header
835 		 */
836 		buf += sizeof(struct icmp_ext_obj_hdr);
837 		obj_len -= sizeof(struct icmp_ext_obj_hdr);
838 
839 		switch (obj_hdr->class_num) {
840 		case MPLS_STACK_ENTRY_CLASS:
841 			switch (obj_hdr->c_type) {
842 			case MPLS_STACK_ENTRY_C_TYPE:
843 				while (obj_len >= (int)sizeof(uint32_t)) {
844 					mpls_hdr = ntohl(*(uint32_t *)buf);
845 
846 					buf += sizeof(uint32_t);
847 					obj_len -= sizeof(uint32_t);
848 					printf(" [MPLS: Label %d Exp %d]",
849 					    MPLS_LABEL(mpls_hdr),
850 					    MPLS_EXP(mpls_hdr));
851 				}
852 				if (obj_len > 0) {
853 					/*
854 					 * Something went wrong, and we're at
855 					 * a unknown offset into the packet,
856 					 * ditch the rest of it.
857 					 */
858 					return;
859 				}
860 				break;
861 			default:
862 				/*
863 				 * Unknown object, skip past it
864 				 */
865 				buf = nextbuf;
866 				break;
867 			}
868 			break;
869 
870 		default:
871 			/*
872 			 * Unknown object, skip past it
873 			 */
874 			buf = nextbuf;
875 			break;
876 		}
877 	}
878 }
879 
880 void
881 dump_packet(void)
882 {
883 	u_char *p;
884 	int i;
885 
886 	fprintf(stderr, "packet data:");
887 	for (p = outpacket, i = 0; i < datalen; i++) {
888 		if ((i % 24) == 0)
889 			fprintf(stderr, "\n ");
890 		fprintf(stderr, " %02x", *p++);
891 	}
892 	fprintf(stderr, "\n");
893 }
894 
895 void
896 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
897 {
898 	struct ip *ip = (struct ip *)outpacket;
899 	u_char *p = (u_char *)(ip + 1);
900 	struct udphdr *up = (struct udphdr *)(p + lsrrlen);
901 	struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
902 	struct packetdata *op;
903 	struct timeval tv;
904 	int i;
905 
906 	ip->ip_len = datalen;
907 	ip->ip_ttl = ttl;
908 	ip->ip_id = htons(ident+seq);
909 
910 	switch (proto) {
911 	case IPPROTO_ICMP:
912 		icmpp->icmp_type = icmp_type;
913 		icmpp->icmp_code = icmp_code;
914 		icmpp->icmp_seq = htons(seq);
915 		icmpp->icmp_id = htons(ident);
916 		op = (struct packetdata *)(icmpp + 1);
917 		break;
918 	case IPPROTO_UDP:
919 		up->uh_sport = htons(ident);
920 		if (iflag)
921 			up->uh_dport = htons(port+seq);
922 		else
923 			up->uh_dport = htons(port);
924 		up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
925 		    lsrrlen));
926 		up->uh_sum = 0;
927 		op = (struct packetdata *)(up + 1);
928 		break;
929 	default:
930 		op = (struct packetdata *)(ip + 1);
931 		break;
932 	}
933 	op->seq = seq;
934 	op->ttl = ttl;
935 
936 	/*
937 	 * We don't want hostiles snooping the net to get any useful
938 	 * information about us. Send the timestamp in network byte order,
939 	 * and perturb the timestamp enough that they won't know our
940 	 * real clock ticker. We don't want to perturb the time by too
941 	 * much: being off by a suspiciously large amount might indicate
942 	 * OpenBSD.
943 	 *
944 	 * The timestamps in the packet are currently unused. If future
945 	 * work wants to use them they will have to subtract out the
946 	 * perturbation first.
947 	 */
948 	gettimeofday(&tv, NULL);
949 	op->sec = htonl(tv.tv_sec + sec_perturb);
950 	op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
951 
952 	if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
953 		icmpp->icmp_cksum = 0;
954 		icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
955 		    datalen - sizeof(struct ip) - lsrrlen);
956 		if (icmpp->icmp_cksum == 0)
957 			icmpp->icmp_cksum = 0xffff;
958 	}
959 
960 	if (dump)
961 		dump_packet();
962 
963 	i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
964 	    sizeof(struct sockaddr_in));
965 	if (i < 0 || i != datalen)  {
966 		if (i < 0)
967 			perror("sendto");
968 		printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
969 		    datalen, i);
970 		fflush(stdout);
971 	}
972 }
973 
974 static const char *ttab[] = {
975 	"Echo Reply",
976 	"ICMP 1",
977 	"ICMP 2",
978 	"Dest Unreachable",
979 	"Source Quench",
980 	"Redirect",
981 	"ICMP 6",
982 	"ICMP 7",
983 	"Echo",
984 	"Router Advert",
985 	"Router Solicit",
986 	"Time Exceeded",
987 	"Param Problem",
988 	"Timestamp",
989 	"Timestamp Reply",
990 	"Info Request",
991 	"Info Reply",
992 	"Mask Request",
993 	"Mask Reply"
994 };
995 
996 /*
997  * Convert an ICMP "type" field to a printable string.
998  */
999 const char *
1000 pr_type(u_int8_t t)
1001 {
1002 	if (t > 18)
1003 		return ("OUT-OF-RANGE");
1004 	return (ttab[t]);
1005 }
1006 
1007 int
1008 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
1009 {
1010 	struct icmp *icp;
1011 	u_char code;
1012 	u_int8_t type;
1013 	int hlen;
1014 #ifndef ARCHAIC
1015 	struct ip *ip;
1016 
1017 	ip = (struct ip *) buf;
1018 	hlen = ip->ip_hl << 2;
1019 	if (cc < hlen + ICMP_MINLEN) {
1020 		if (verbose)
1021 			printf("packet too short (%d bytes) from %s\n", cc,
1022 			    inet_ntoa(from->sin_addr));
1023 		return (0);
1024 	}
1025 	cc -= hlen;
1026 	icp = (struct icmp *)(buf + hlen);
1027 #else
1028 	icp = (struct icmp *)buf;
1029 #endif /* ARCHAIC */
1030 	type = icp->icmp_type;
1031 	code = icp->icmp_code;
1032 	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
1033 	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
1034 		struct ip *hip;
1035 		struct udphdr *up;
1036 		struct icmp *icmpp;
1037 
1038 		hip = &icp->icmp_ip;
1039 		hlen = hip->ip_hl << 2;
1040 
1041 		switch (proto) {
1042 		case IPPROTO_ICMP:
1043 			if (icmp_type == ICMP_ECHO &&
1044 			    type == ICMP_ECHOREPLY &&
1045 			    icp->icmp_id == htons(ident) &&
1046 			    icp->icmp_seq == htons(seq))
1047 				return (-2); /* we got there */
1048 
1049 			icmpp = (struct icmp *)((u_char *)hip + hlen);
1050 			if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
1051 			    icmpp->icmp_id == htons(ident) &&
1052 			    icmpp->icmp_seq == htons(seq))
1053 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1054 			break;
1055 
1056 		case IPPROTO_UDP:
1057 			up = (struct udphdr *)((u_char *)hip + hlen);
1058 			if (hlen + 12 <= cc && hip->ip_p == proto &&
1059 			    up->uh_sport == htons(ident) &&
1060 			    ((iflag && up->uh_dport == htons(port + seq)) ||
1061 			    (!iflag && up->uh_dport == htons(port))))
1062 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1063 			break;
1064 		default:
1065 			/* this is some odd, user specified proto,
1066 			 * how do we check it?
1067 			 */
1068 			if (hip->ip_p == proto)
1069 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1070 		}
1071 	}
1072 #ifndef ARCHAIC
1073 	if (verbose) {
1074 		int i;
1075 		in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
1076 
1077 		printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
1078 		printf(" to %s", inet_ntoa(ip->ip_dst));
1079 		printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
1080 		    icp->icmp_code);
1081 		for (i = 4; i < cc ; i += sizeof(in_addr_t))
1082 			printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
1083 	}
1084 #endif /* ARCHAIC */
1085 	return (0);
1086 }
1087 
1088 void
1089 print(u_char *buf, int cc, struct sockaddr_in *from)
1090 {
1091 	struct ip *ip;
1092 	int hlen;
1093 
1094 	ip = (struct ip *) buf;
1095 	hlen = ip->ip_hl << 2;
1096 	cc -= hlen;
1097 
1098 	if (nflag)
1099 		printf(" %s", inet_ntoa(from->sin_addr));
1100 	else
1101 		printf(" %s (%s)", inetname(from->sin_addr),
1102 		    inet_ntoa(from->sin_addr));
1103 
1104 	if (verbose)
1105 		printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
1106 }
1107 
1108 
1109 /*
1110  * Checksum routine for Internet Protocol family headers (C Version)
1111  */
1112 u_short
1113 in_cksum(u_short *addr, int len)
1114 {
1115 	u_short *w = addr, answer;
1116 	int nleft = len, sum = 0;
1117 
1118 	/*
1119 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
1120 	 *  we add sequential 16 bit words to it, and at the end, fold
1121 	 *  back all the carry bits from the top 16 bits into the lower
1122 	 *  16 bits.
1123 	 */
1124 	while (nleft > 1)  {
1125 		sum += *w++;
1126 		nleft -= 2;
1127 	}
1128 
1129 	/* mop up an odd byte, if necessary */
1130 	if (nleft == 1)
1131 		sum += *(u_char *)w;
1132 
1133 	/*
1134 	 * add back carry outs from top 16 bits to low 16 bits
1135 	 */
1136 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
1137 	sum += (sum >> 16);			/* add carry */
1138 	answer = ~sum;				/* truncate to 16 bits */
1139 	return (answer);
1140 }
1141 
1142 /*
1143  * Construct an Internet address representation.
1144  * If the nflag has been supplied, give
1145  * numeric value, otherwise try for symbolic name.
1146  */
1147 char *
1148 inetname(struct in_addr in)
1149 {
1150 	static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1151 	static int first = 1;
1152 	struct hostent *hp;
1153 	char *cp;
1154 
1155 	if (first && !nflag) {
1156 		first = 0;
1157 		if (gethostname(domain, sizeof domain) == 0 &&
1158 		    (cp = strchr(domain, '.')) != NULL) {
1159 			strlcpy(domain, cp + 1, sizeof(domain));
1160 		}
1161 	}
1162 	if (!nflag && in.s_addr != INADDR_ANY) {
1163 		hp = gethostbyaddr((char *)&in, sizeof(in), AF_INET);
1164 		if (hp != NULL) {
1165 			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1166 			    strcmp(cp + 1, domain) == 0)
1167 				*cp = '\0';
1168 			strlcpy(line, hp->h_name, sizeof(line));
1169 			return (line);
1170 		}
1171 	}
1172 	return (inet_ntoa(in));
1173 }
1174 
1175 void
1176 usage(void)
1177 {
1178 	fprintf(stderr,
1179 	    "usage: %s [-cdDIlMnrSv] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1180 	    "\t[-p port] [-P proto] [-q nqueries] [-s src_addr] [-t tos]\n"
1181 	    "\t[-w waittime] host [packetsize]\n", getprogname());
1182 	exit(1);
1183 }
1184