1 /*	$OpenBSD: traceroute.c,v 1.61 2004/01/26 18:23:51 deraadt Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Van Jacobson.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * @(#)traceroute.c	8.1 (Berkeley) 6/6/93
36  */
37 
38 /*
39  * traceroute host  - trace the route ip packets follow going to "host".
40  *
41  * Attempt to trace the route an ip packet would follow to some
42  * internet host.  We find out intermediate hops by launching probe
43  * packets with a small ttl (time to live) then listening for an
44  * icmp "time exceeded" reply from a gateway.  We start our probes
45  * with a ttl of one and increase by one until we get an icmp "port
46  * unreachable" (which means we got to "host") or hit a max (which
47  * defaults to 64 hops & can be changed with the -m flag).  Three
48  * probes (change with -q flag) are sent at each ttl setting and a
49  * line is printed showing the ttl, address of the gateway and
50  * round trip time of each probe.  If the probe answers come from
51  * different gateways, the address of each responding system will
52  * be printed.  If there is no response within a 5 sec. timeout
53  * interval (changed with the -w flag), a "*" is printed for that
54  * probe.
55  *
56  * Probe packets are UDP format.  We don't want the destination
57  * host to process them so the destination port is set to an
58  * unlikely value (if some clod on the destination is using that
59  * value, it can be changed with the -p flag).
60  *
61  * A sample use might be:
62  *
63  *     [yak 71]% traceroute nis.nsf.net.
64  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
65  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
66  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
67  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
68  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
69  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
70  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
71  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
72  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
73  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
74  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
75  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
76  *
77  * Note that lines 2 & 3 are the same.  This is due to a buggy
78  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
79  * packets with a zero ttl.
80  *
81  * A more interesting example is:
82  *
83  *     [yak 72]% traceroute allspice.lcs.mit.edu.
84  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
85  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
86  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
87  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
88  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
89  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
90  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
91  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
92  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
93  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
94  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
95  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
96  *     12  * * *
97  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
98  *     14  * * *
99  *     15  * * *
100  *     16  * * *
101  *     17  * * *
102  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
103  *
104  * (I start to see why I'm having so much trouble with mail to
105  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
106  * either don't send ICMP "time exceeded" messages or send them
107  * with a ttl too small to reach us.  14 - 17 are running the
108  * MIT C Gateway code that doesn't send "time exceeded"s.  God
109  * only knows what's going on with 12.
110  *
111  * The silent gateway 12 in the above may be the result of a bug in
112  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
113  * sends an unreachable message using whatever ttl remains in the
114  * original datagram.  Since, for gateways, the remaining ttl is
115  * zero, the icmp "time exceeded" is guaranteed to not make it back
116  * to us.  The behavior of this bug is slightly more interesting
117  * when it appears on the destination system:
118  *
119  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
120  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
121  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
122  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
123  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
124  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
125  *      7  * * *
126  *      8  * * *
127  *      9  * * *
128  *     10  * * *
129  *     11  * * *
130  *     12  * * *
131  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
132  *
133  * Notice that there are 12 "gateways" (13 is the final
134  * destination) and exactly the last half of them are "missing".
135  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
136  * is using the ttl from our arriving datagram as the ttl in its
137  * icmp reply.  So, the reply will time out on the return path
138  * (with no notice sent to anyone since icmp's aren't sent for
139  * icmp's) until we probe with a ttl that's at least twice the path
140  * length.  I.e., rip is really only 7 hops away.  A reply that
141  * returns with a ttl of 1 is a clue this problem exists.
142  * Traceroute prints a "!" after the time if the ttl is <= 1.
143  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
144  * non-standard (HPUX) software, expect to see this problem
145  * frequently and/or take care picking the target host of your
146  * probes.
147  *
148  * Other possible annotations after the time are !H, !N, !P (got a host,
149  * network or protocol unreachable, respectively), !S or !F (source
150  * route failed or fragmentation needed -- neither of these should
151  * ever occur and the associated gateway is busted if you see one).  If
152  * almost all the probes result in some kind of unreachable, traceroute
153  * will give up and exit.
154  *
155  * Notes
156  * -----
157  * This program must be run by root or be setuid.  (I suggest that
158  * you *don't* make it setuid -- casual use could result in a lot
159  * of unnecessary traffic on our poor, congested nets.)
160  *
161  * This program requires a kernel mod that does not appear in any
162  * system available from Berkeley:  A raw ip socket using proto
163  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
164  * opposed to data to be wrapped in a ip datagram).  See the README
165  * file that came with the source to this program for a description
166  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
167  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
168  * MODIFIED TO RUN THIS PROGRAM.
169  *
170  * The udp port usage may appear bizarre (well, ok, it is bizarre).
171  * The problem is that an icmp message only contains 8 bytes of
172  * data from the original datagram.  8 bytes is the size of a udp
173  * header so, if we want to associate replies with the original
174  * datagram, the necessary information must be encoded into the
175  * udp header (the ip id could be used but there's no way to
176  * interlock with the kernel's assignment of ip id's and, anyway,
177  * it would have taken a lot more kernel hacking to allow this
178  * code to set the ip id).  So, to allow two or more users to
179  * use traceroute simultaneously, we use this task's pid as the
180  * source port (the high bit is set to move the port number out
181  * of the "likely" range).  To keep track of which probe is being
182  * replied to (so times and/or hop counts don't get confused by a
183  * reply that was delayed in transit), we increment the destination
184  * port number before each probe.
185  *
186  * Don't use this as a coding example.  I was trying to find a
187  * routing problem and this code sort-of popped out after 48 hours
188  * without sleep.  I was amazed it ever compiled, much less ran.
189  *
190  * I stole the idea for this program from Steve Deering.  Since
191  * the first release, I've learned that had I attended the right
192  * IETF working group meetings, I also could have stolen it from Guy
193  * Almes or Matt Mathis.  I don't know (or care) who came up with
194  * the idea first.  I envy the originators' perspicacity and I'm
195  * glad they didn't keep the idea a secret.
196  *
197  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
198  * enhancements to the original distribution.
199  *
200  * I've hacked up a round-trip-route version of this that works by
201  * sending a loose-source-routed udp datagram through the destination
202  * back to yourself.  Unfortunately, SO many gateways botch source
203  * routing, the thing is almost worthless.  Maybe one day...
204  *
205  *  -- Van Jacobson (van@helios.ee.lbl.gov)
206  *     Tue Dec 20 03:50:13 PST 1988
207  */
208 
209 #include <sys/param.h>
210 #include <sys/time.h>
211 #include <sys/socket.h>
212 #include <sys/file.h>
213 #include <sys/ioctl.h>
214 #include <sys/sysctl.h>
215 
216 #include <netinet/in_systm.h>
217 #include <netinet/in.h>
218 #include <netinet/ip.h>
219 #include <netinet/ip_icmp.h>
220 #include <netinet/ip_var.h>
221 #include <netinet/udp.h>
222 
223 #include <arpa/inet.h>
224 
225 #include <ctype.h>
226 #include <err.h>
227 #include <errno.h>
228 #include <netdb.h>
229 #include <stdio.h>
230 #include <stdlib.h>
231 #include <string.h>
232 #include <unistd.h>
233 
234 #define	MAX_LSRR	((MAX_IPOPTLEN - 4) / 4)
235 
236 /*
237  * Format of the data in a (udp) probe packet.
238  */
239 struct packetdata {
240 	u_char seq;		/* sequence number of this packet */
241 	u_int8_t ttl;		/* ttl packet left with */
242 	u_int32_t sec;		/* time packet left */
243 	u_int32_t usec;
244 };
245 
246 /*
247  * Support for ICMP extensions - RFC4950.
248  */
249 #define ICMP_EXT_OFFSET  8 + 128 /* ICMP type, code, checksum (unused)
250 				  * + original datagram */
251 #define ICMP_EXT_VERSION 2
252 
253 /* ICMP Extension Header according to RFC4884. */
254 #define EXT_VERSION(x)	(((x) & 0xf0000000) >> 28)
255 #define EXT_CHECKSUM(x)	((x) & 0x0000ffff)
256 
257 /*
258  * ICMP extensions, object header
259  */
260 struct icmp_ext_obj_hdr {
261 	u_short length;
262 	u_char  class_num;
263 #define MPLS_STACK_ENTRY_CLASS 1
264 	u_char  c_type;
265 #define MPLS_STACK_ENTRY_C_TYPE 1
266 };
267 
268 /* MPLS Label Stack Object. */
269 #define MPLS_LABEL(x)   (((x) & 0xfffff000) >> 12)
270 #define MPLS_EXP(x)     (((x) & 0x00000e00) >> 9)
271 #define MPLS_STACK(x)   (((x) & 0x00000100) >> 8)
272 #define MPLS_TTL(x)     ((x) & 0x000000ff)
273 
274 static struct in_addr gateway[MAX_LSRR + 1];
275 static int lsrrlen = 0;
276 static int32_t sec_perturb;
277 static int32_t usec_perturb;
278 
279 static u_char packet[512], *outpacket;	/* last inbound (icmp) packet */
280 
281 static void decode_extensions(unsigned char *, int);
282 static void dump_packet(void);
283 static int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
284 static void send_probe(int, u_int8_t, int, struct sockaddr_in *);
285 static int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
286 static const char *pr_type(u_int8_t);
287 static void print(u_char *, int, struct sockaddr_in *);
288 static char *inetname(struct in_addr);
289 static u_short in_cksum(u_short *, int);
290 static void usage(void) __dead2;
291 
292 static int s;			/* receive (icmp) socket file descriptor */
293 static int sndsock;		/* send (udp) socket file descriptor */
294 
295 static int datalen;		/* How much data */
296 static int headerlen;		/* How long packet's header is */
297 
298 static char *source = NULL;
299 static char *hostname;
300 
301 static int nprobes = 3;
302 static u_int8_t max_ttl = IPDEFTTL;
303 static u_int8_t first_ttl = 1;
304 static u_short ident;
305 static u_short port = 32768+666; /* start udp dest port # for probe packets */
306 static u_char	proto = IPPROTO_UDP;
307 static u_int8_t  icmp_type = ICMP_ECHO; /* default ICMP code/type */
308 static u_char  icmp_code = 0;
309 static int options;		/* socket options */
310 static int verbose;
311 static int waittime = 5;	/* time to wait for response (in seconds) */
312 static int nflag;		/* print addresses numerically */
313 static int dump;
314 static int Mflag;		/* show MPLS labels if any */
315 
316 int
main(int argc,char * argv[])317 main(int argc, char *argv[])
318 {
319 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
320 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
321 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
322 	size_t size = sizeof(max_ttl);
323 	struct sockaddr_in from, to;
324 	struct hostent *hp;
325 	u_int32_t tmprnd;
326 	struct ip *ip;
327 	u_int8_t ttl;
328 	char *ep;
329 	long l;
330 
331 	if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
332 		err(5, "icmp socket");
333 	if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
334 		err(5, "raw socket");
335 
336 	/* revoke privs */
337 	seteuid(getuid());
338 	setuid(getuid());
339 
340 	sysctl(mib, NELEM(mib), &max_ttl, &size, NULL, 0);
341 
342 	while ((ch = getopt(argc, argv, "SDIdg:f:m:np:q:rs:t:w:vlP:cM")) != -1)
343 		switch (ch) {
344 		case 'S':
345 			sump = 1;
346 			break;
347 		case 'f':
348 			errno = 0;
349 			ep = NULL;
350 			l = strtol(optarg, &ep, 10);
351 			if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
352 				errx(1, "min ttl must be 1 to %u.", max_ttl);
353 			first_ttl = (u_int8_t)l;
354 			break;
355 		case 'c':
356 			incflag = 0;
357 			break;
358 		case 'd':
359 			options |= SO_DEBUG;
360 			break;
361 		case 'D':
362 			dump = 1;
363 			break;
364 		case 'g':
365 			if (lsrr >= MAX_LSRR)
366 				errx(1, "too many gateways; max %d", MAX_LSRR);
367 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
368 				hp = gethostbyname(optarg);
369 				if (hp == NULL)
370 					errx(1, "unknown host %s", optarg);
371 				memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
372 			}
373 			if (++lsrr == 1)
374 				lsrrlen = 4;
375 			lsrrlen += 4;
376 			break;
377 		case 'I':
378 			if (protoset)
379 				errx(1, "protocol already set with -P");
380 			protoset = 1;
381 			proto = IPPROTO_ICMP;
382 			break;
383 		case 'l':
384 			ttl_flag++;
385 			break;
386 		case 'm':
387 			errno = 0;
388 			ep = NULL;
389 			l = strtol(optarg, &ep, 10);
390 			if (errno || !*optarg || *ep || l < first_ttl ||
391 			    l > MAXTTL)
392 				errx(1, "max ttl must be %u to %u.", first_ttl,
393 				    MAXTTL);
394 			max_ttl = (u_int8_t)l;
395 			break;
396 		case 'M':
397 			Mflag = 1;
398 			break;
399 		case 'n':
400 			nflag++;
401 			break;
402 		case 'p':
403 			errno = 0;
404 			ep = NULL;
405 			l = strtol(optarg, &ep, 10);
406 			if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
407 				errx(1, "port must be >0, <65536.");
408 			port = (int)l;
409 			break;
410 		case 'P':
411 			if (protoset)
412 				errx(1, "protocol already set with -I");
413 			protoset = 1;
414 			errno = 0;
415 			ep = NULL;
416 			l = strtol(optarg, &ep, 10);
417 			if (errno || !*optarg || *ep || l < 1 ||
418 			    l >= IPPROTO_MAX) {
419 				struct protoent *pent;
420 
421 				pent = getprotobyname(optarg);
422 				if (pent)
423 					proto = pent->p_proto;
424 				else
425 					errx(1, "proto must be >=1, or a name.");
426 			} else
427 				proto = (int)l;
428 			break;
429 		case 'q':
430 			errno = 0;
431 			ep = NULL;
432 			l = strtol(optarg, &ep, 10);
433 			if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
434 				errx(1, "nprobes must be >0.");
435 			nprobes = (int)l;
436 			break;
437 		case 'r':
438 			options |= SO_DONTROUTE;
439 			break;
440 		case 's':
441 			/*
442 			 * set the ip source address of the outbound
443 			 * probe (e.g., on a multi-homed host).
444 			 */
445 			source = optarg;
446 			break;
447 		case 't':
448 			errno = 0;
449 			ep = NULL;
450 			l = strtol(optarg, &ep, 10);
451 			if (errno || !*optarg || *ep || l < 0 || l > 255)
452 				errx(1, "tos must be 0 to 255.");
453 			tos = (int)l;
454 			break;
455 		case 'v':
456 			verbose++;
457 			break;
458 		case 'w':
459 			errno = 0;
460 			ep = NULL;
461 			l = strtol(optarg, &ep, 10);
462 			if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
463 				errx(1, "wait must be >1 sec.");
464 			waittime = (int)l;
465 			break;
466 		default:
467 			usage();
468 		}
469 	argc -= optind;
470 	argv += optind;
471 
472 	if (argc < 1)
473 		usage();
474 
475 	setlinebuf (stdout);
476 
477 	memset(&to, 0, sizeof(struct sockaddr));
478 	to.sin_family = AF_INET;
479 	if (inet_aton(*argv, &to.sin_addr) != 0)
480 		hostname = *argv;
481 	else {
482 		hp = gethostbyname(*argv);
483 		if (hp == NULL)
484 			errx(1, "unknown host %s", *argv);
485 		to.sin_family = hp->h_addrtype;
486 		memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
487 		if ((hostname = strdup(hp->h_name)) == NULL)
488 			err(1, "malloc");
489 		if (hp->h_addr_list[1] != NULL)
490 			warnx("Warning: %s has multiple addresses; using %s",
491 			    hostname, inet_ntoa(to.sin_addr));
492 	}
493 	if (*++argv) {
494 		errno = 0;
495 		ep = NULL;
496 		l = strtol(*argv, &ep, 10);
497 		if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
498 			errx(1, "datalen out of range");
499 		datalen = (int)l;
500 	}
501 
502 	switch (proto) {
503 	case IPPROTO_UDP:
504 		headerlen = (sizeof(struct ip) + lsrrlen +
505 		    sizeof(struct udphdr) + sizeof(struct packetdata));
506 		break;
507 	case IPPROTO_ICMP:
508 		headerlen = (sizeof(struct ip) + lsrrlen +
509 		    sizeof(struct icmp) + sizeof(struct packetdata));
510 		break;
511 	default:
512 		headerlen = (sizeof(struct ip) + lsrrlen +
513 		    sizeof(struct packetdata));
514 	}
515 
516 	if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
517 		errx(1, "packet size must be 0 to %d.",
518 		    IP_MAXPACKET - headerlen);
519 
520 	datalen += headerlen;
521 
522 	outpacket = (u_char *)malloc(datalen);
523 	if (outpacket == NULL)
524 		err(1, "malloc");
525 	memset(outpacket, 0, datalen);
526 
527 	ip = (struct ip *)outpacket;
528 	if (lsrr != 0) {
529 		u_char *p = (u_char *)(ip + 1);
530 
531 		*p++ = IPOPT_NOP;
532 		*p++ = IPOPT_LSRR;
533 		*p++ = lsrrlen - 1;
534 		*p++ = IPOPT_MINOFF;
535 		gateway[lsrr] = to.sin_addr;
536 		for (i = 1; i <= lsrr; i++) {
537 			memcpy(p, &gateway[i], sizeof(struct in_addr));
538 			p += sizeof(struct in_addr);
539 		}
540 		ip->ip_dst = gateway[0];
541 	} else
542 		ip->ip_dst = to.sin_addr;
543 	ip->ip_off = htons(0);
544 	ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
545 	ip->ip_p = proto;
546 	ip->ip_v = IPVERSION;
547 	ip->ip_tos = tos;
548 
549 	ident = (getpid() & 0xffff) | 0x8000;
550 	tmprnd = arc4random();
551 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
552 	    (tmprnd & 0x7ff);
553 	usec_perturb = arc4random();
554 
555 	if (options & SO_DEBUG)
556 		setsockopt(s, SOL_SOCKET, SO_DEBUG, (char *)&on, sizeof(on));
557 #ifdef SO_SNDBUF
558 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
559 	    sizeof(datalen)) < 0)
560 		err(6, "SO_SNDBUF");
561 #endif /* SO_SNDBUF */
562 #ifdef IP_HDRINCL
563 	if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
564 	    sizeof(on)) < 0)
565 		err(6, "IP_HDRINCL");
566 #endif /* IP_HDRINCL */
567 	if (options & SO_DEBUG)
568 		setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
569 		    (char *)&on, sizeof(on));
570 	if (options & SO_DONTROUTE)
571 		setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
572 		    (char *)&on, sizeof(on));
573 
574 	if (source) {
575 		memset(&from, 0, sizeof(struct sockaddr));
576 		from.sin_family = AF_INET;
577 		if (inet_aton(source, &from.sin_addr) == 0)
578 			errx(1, "unknown host %s", source);
579 		ip->ip_src = from.sin_addr;
580 		if (getuid() != 0 &&
581 		    (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
582 		    (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
583 			errx(1, "source is on 127/8, destination is not");
584 
585 		if (getuid() &&
586 		    bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
587 			err(1, "bind");
588 	}
589 
590 	fprintf(stderr, "traceroute to %s (%s)", hostname,
591 		inet_ntoa(to.sin_addr));
592 	if (source)
593 		fprintf(stderr, " from %s", source);
594 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
595 	fflush(stderr);
596 
597 	if (first_ttl > 1)
598 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
599 
600 	for (ttl = first_ttl; ttl <= max_ttl; ++ttl) {
601 		int got_there = 0, unreachable = 0, timeout = 0, loss;
602 		int gotlastaddr = 0;
603 		in_addr_t lastaddr = 0;
604 		quad_t dt;
605 
606 		printf("%2u ", ttl);
607 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
608 			int cc;
609 			struct timeval t1, t2;
610 			int code;
611 
612 			gettimeofday(&t1, NULL);
613 			send_probe(++seq, ttl, incflag, &to);
614 			while ((cc = wait_for_reply(s, &from, &t1))) {
615 				gettimeofday(&t2, NULL);
616 				if (t2.tv_sec - t1.tv_sec > waittime) {
617 					cc = 0;
618 					break;
619 				}
620 				i = packet_ok(packet, cc, &from, seq, incflag);
621 				/* Skip short packet */
622 				if (i == 0)
623 					continue;
624 				if (!gotlastaddr ||
625 				    from.sin_addr.s_addr != lastaddr) {
626 					if (gotlastaddr)
627 						printf("\n   ");
628 					print(packet, cc, &from);
629 					lastaddr = from.sin_addr.s_addr;
630 					++gotlastaddr;
631 				}
632 				dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
633 				    (quad_t)(t2.tv_usec - t1.tv_usec);
634 				printf("  %u", (u_int)(dt / 1000));
635 				if (dt % 1000)
636 					printf(".%u", (u_int)(dt % 1000));
637 				printf(" ms");
638 				ip = (struct ip *)packet;
639 				if (ttl_flag)
640 					printf(" (%u)", ip->ip_ttl);
641 				if (i == -2) {
642 #ifndef ARCHAIC
643 					ip = (struct ip *)packet;
644 					if (ip->ip_ttl <= 1)
645 						printf(" !");
646 #endif
647 					++got_there;
648 					break;
649 				}
650 				/* time exceeded in transit */
651 				if (i == -1)
652 					break;
653 				code = i - 1;
654 				switch (code) {
655 				case ICMP_UNREACH_PORT:
656 #ifndef ARCHAIC
657 					ip = (struct ip *)packet;
658 					if (ip->ip_ttl <= 1)
659 						printf(" !");
660 #endif /* ARCHAIC */
661 					++got_there;
662 					break;
663 				case ICMP_UNREACH_NET:
664 					++unreachable;
665 					printf(" !N");
666 					break;
667 				case ICMP_UNREACH_HOST:
668 					++unreachable;
669 					printf(" !H");
670 					break;
671 				case ICMP_UNREACH_PROTOCOL:
672 					++got_there;
673 					printf(" !P");
674 					break;
675 				case ICMP_UNREACH_NEEDFRAG:
676 					++unreachable;
677 					printf(" !F");
678 					break;
679 				case ICMP_UNREACH_SRCFAIL:
680 					++unreachable;
681 					printf(" !S");
682 					break;
683 				case ICMP_UNREACH_FILTER_PROHIB:
684 					++unreachable;
685 					printf(" !X");
686 					break;
687 				case ICMP_UNREACH_NET_PROHIB: /*misuse*/
688 					++unreachable;
689 					printf(" !A");
690 					break;
691 				case ICMP_UNREACH_HOST_PROHIB:
692 					++unreachable;
693 					printf(" !C");
694 					break;
695 				case ICMP_UNREACH_NET_UNKNOWN:
696 				case ICMP_UNREACH_HOST_UNKNOWN:
697 					++unreachable;
698 					printf(" !U");
699 					break;
700 				case ICMP_UNREACH_ISOLATED:
701 					++unreachable;
702 					printf(" !I");
703 					break;
704 				case ICMP_UNREACH_TOSNET:
705 				case ICMP_UNREACH_TOSHOST:
706 					++unreachable;
707 					printf(" !T");
708 					break;
709 				default:
710 					++unreachable;
711 					printf(" !<%d>", i - 1);
712 					break;
713 				}
714 				break;
715 			}
716 			if (cc == 0) {
717 				printf(" *");
718 				timeout++;
719 				loss++;
720 			}
721 			else if (cc && probe == nprobes - 1 && Mflag)
722 				decode_extensions(packet, cc);
723 			fflush(stdout);
724 		}
725 		if (sump)
726 			printf(" (%d%% loss)", (loss * 100) / nprobes);
727 		putchar('\n');
728 		if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
729 			break;
730 	}
731 	exit(0);
732 }
733 
734 static int
wait_for_reply(int sock,struct sockaddr_in * from,struct timeval * sent)735 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
736 {
737 	socklen_t fromlen = sizeof (*from);
738 	struct timeval now, wait;
739 	int cc = 0, fdsn;
740 	fd_set *fdsp;
741 
742 	fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
743 	if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
744 		err(1, "malloc");
745 	memset(fdsp, 0, fdsn);
746 	FD_SET(sock, fdsp);
747 	gettimeofday(&now, NULL);
748 	wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
749 	wait.tv_usec =  sent->tv_usec - now.tv_usec;
750 	if (wait.tv_usec < 0) {
751 		wait.tv_usec += 1000000;
752 		wait.tv_sec--;
753 	}
754 	if (wait.tv_sec < 0)
755 		wait.tv_sec = wait.tv_usec = 0;
756 
757 	if (select(sock+1, fdsp, NULL, NULL, &wait) > 0)
758 		cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
759 		    (struct sockaddr *)from, &fromlen);
760 
761 	free(fdsp);
762 	return (cc);
763 }
764 
765 static void
decode_extensions(unsigned char * buf,int ip_len)766 decode_extensions(unsigned char *buf, int ip_len)
767 {
768 	 uint32_t *cmn_hdr;
769 	 struct icmp_ext_obj_hdr *obj_hdr;
770 	 uint32_t mpls_hdr;
771 	 int data_len, obj_len;
772 	 struct ip *ip;
773 
774 	 ip = (struct ip *)buf;
775 
776 	 if (ip_len <= (int)(sizeof(struct ip) + ICMP_EXT_OFFSET)) {
777 		/*
778 		 * No support for ICMP extensions on this host
779 		 */
780 		return;
781 	 }
782 
783 	 /*
784 	  * Move forward to the start of the ICMP extensions, if present
785 	  */
786 	 buf += (ip->ip_hl << 2) + ICMP_EXT_OFFSET;
787 	 cmn_hdr = (uint32_t *)buf;
788 
789 	 if (EXT_VERSION(ntohl(*cmn_hdr)) != ICMP_EXT_VERSION) {
790 		/*
791 		 * Unknown version
792 		 */
793 		return;
794 	 }
795 
796 	 data_len = ip_len - ((u_char *)cmn_hdr - (u_char *)ip);
797 
798 	 /*
799 	  * Check the checksum, cmn_hdr->checksum == 0 means no checksum'ing
800 	  * done by sender.
801 	  *
802 	  * If the checksum is ok, we'll get 0, as the checksum is calculated
803 	  * with the checksum field being 0'd.
804 	  */
805 	 if (EXT_CHECKSUM(ntohl(*cmn_hdr)) &&
806 	     in_cksum((u_short *)cmn_hdr, data_len)) {
807 		return;
808 	 }
809 
810 	 buf += sizeof(*cmn_hdr);
811 	 data_len -= sizeof(*cmn_hdr);
812 
813 	 while (data_len >= (int)sizeof(struct icmp_ext_obj_hdr)) {
814 		unsigned char *nextbuf;
815 
816 		obj_hdr = (struct icmp_ext_obj_hdr *)buf;
817 		obj_len = ntohs(obj_hdr->length);
818 
819 		/*
820 		 * Sanity check the length field
821 		 */
822 		if (obj_len < (int)sizeof(*obj_hdr) || obj_len > data_len)
823 			return;
824 
825 		/* Object has to be 4-byte aligned. */
826 		if (obj_len & 3)
827 			return;
828 
829 		nextbuf = buf + obj_len;
830 		data_len -= obj_len;
831 
832 		/*
833 		 * Move past the object header
834 		 */
835 		buf += sizeof(struct icmp_ext_obj_hdr);
836 		obj_len -= sizeof(struct icmp_ext_obj_hdr);
837 
838 		switch (obj_hdr->class_num) {
839 		case MPLS_STACK_ENTRY_CLASS:
840 			switch (obj_hdr->c_type) {
841 			case MPLS_STACK_ENTRY_C_TYPE:
842 				while (obj_len >= (int)sizeof(uint32_t)) {
843 					mpls_hdr = ntohl(*(uint32_t *)buf);
844 
845 					buf += sizeof(uint32_t);
846 					obj_len -= sizeof(uint32_t);
847 					printf(" [MPLS: Label %d Exp %d]",
848 					    MPLS_LABEL(mpls_hdr),
849 					    MPLS_EXP(mpls_hdr));
850 				}
851 				if (obj_len > 0) {
852 					/*
853 					 * Something went wrong, and we're at
854 					 * a unknown offset into the packet,
855 					 * ditch the rest of it.
856 					 */
857 					return;
858 				}
859 				break;
860 			default:
861 				/*
862 				 * Unknown object, skip past it
863 				 */
864 				buf = nextbuf;
865 				break;
866 			}
867 			break;
868 
869 		default:
870 			/*
871 			 * Unknown object, skip past it
872 			 */
873 			buf = nextbuf;
874 			break;
875 		}
876 	}
877 }
878 
879 static void
dump_packet(void)880 dump_packet(void)
881 {
882 	u_char *p;
883 	int i;
884 
885 	fprintf(stderr, "packet data:");
886 	for (p = outpacket, i = 0; i < datalen; i++) {
887 		if ((i % 24) == 0)
888 			fprintf(stderr, "\n ");
889 		fprintf(stderr, " %02x", *p++);
890 	}
891 	fprintf(stderr, "\n");
892 }
893 
894 static void
send_probe(int seq,u_int8_t ttl,int iflag,struct sockaddr_in * to)895 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
896 {
897 	struct ip *ip = (struct ip *)outpacket;
898 	u_char *p = (u_char *)(ip + 1);
899 	struct udphdr *up = (struct udphdr *)(p + lsrrlen);
900 	struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
901 	struct packetdata *op;
902 	struct timeval tv;
903 	int i;
904 
905 	ip->ip_len = datalen;
906 	ip->ip_ttl = ttl;
907 	ip->ip_id = htons(ident+seq);
908 
909 	switch (proto) {
910 	case IPPROTO_ICMP:
911 		icmpp->icmp_type = icmp_type;
912 		icmpp->icmp_code = icmp_code;
913 		icmpp->icmp_seq = htons(seq);
914 		icmpp->icmp_id = htons(ident);
915 		op = (struct packetdata *)(icmpp + 1);
916 		break;
917 	case IPPROTO_UDP:
918 		up->uh_sport = htons(ident);
919 		if (iflag)
920 			up->uh_dport = htons(port+seq);
921 		else
922 			up->uh_dport = htons(port);
923 		up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
924 		    lsrrlen));
925 		up->uh_sum = 0;
926 		op = (struct packetdata *)(up + 1);
927 		break;
928 	default:
929 		op = (struct packetdata *)(ip + 1);
930 		break;
931 	}
932 	op->seq = seq;
933 	op->ttl = ttl;
934 
935 	/*
936 	 * We don't want hostiles snooping the net to get any useful
937 	 * information about us. Send the timestamp in network byte order,
938 	 * and perturb the timestamp enough that they won't know our
939 	 * real clock ticker. We don't want to perturb the time by too
940 	 * much: being off by a suspiciously large amount might indicate
941 	 * OpenBSD.
942 	 *
943 	 * The timestamps in the packet are currently unused. If future
944 	 * work wants to use them they will have to subtract out the
945 	 * perturbation first.
946 	 */
947 	gettimeofday(&tv, NULL);
948 	op->sec = htonl(tv.tv_sec + sec_perturb);
949 	op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
950 
951 	if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
952 		icmpp->icmp_cksum = 0;
953 		icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
954 		    datalen - sizeof(struct ip) - lsrrlen);
955 		if (icmpp->icmp_cksum == 0)
956 			icmpp->icmp_cksum = 0xffff;
957 	}
958 
959 	if (dump)
960 		dump_packet();
961 
962 	i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
963 	    sizeof(struct sockaddr_in));
964 	if (i < 0 || i != datalen)  {
965 		if (i < 0)
966 			perror("sendto");
967 		printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
968 		    datalen, i);
969 		fflush(stdout);
970 	}
971 }
972 
973 static const char *ttab[] = {
974 	"Echo Reply",
975 	"ICMP 1",
976 	"ICMP 2",
977 	"Dest Unreachable",
978 	"Source Quench",
979 	"Redirect",
980 	"ICMP 6",
981 	"ICMP 7",
982 	"Echo",
983 	"Router Advert",
984 	"Router Solicit",
985 	"Time Exceeded",
986 	"Param Problem",
987 	"Timestamp",
988 	"Timestamp Reply",
989 	"Info Request",
990 	"Info Reply",
991 	"Mask Request",
992 	"Mask Reply"
993 };
994 
995 /*
996  * Convert an ICMP "type" field to a printable string.
997  */
998 static const char *
pr_type(u_int8_t t)999 pr_type(u_int8_t t)
1000 {
1001 	if (t > 18)
1002 		return ("OUT-OF-RANGE");
1003 	return (ttab[t]);
1004 }
1005 
1006 static int
packet_ok(u_char * buf,int cc,struct sockaddr_in * from,int seq,int iflag)1007 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
1008 {
1009 	struct icmp *icp;
1010 	u_char code;
1011 	u_int8_t type;
1012 	int hlen;
1013 #ifndef ARCHAIC
1014 	struct ip *ip;
1015 
1016 	ip = (struct ip *) buf;
1017 	hlen = ip->ip_hl << 2;
1018 	if (cc < hlen + ICMP_MINLEN) {
1019 		if (verbose)
1020 			printf("packet too short (%d bytes) from %s\n", cc,
1021 			    inet_ntoa(from->sin_addr));
1022 		return (0);
1023 	}
1024 	cc -= hlen;
1025 	icp = (struct icmp *)(buf + hlen);
1026 #else
1027 	icp = (struct icmp *)buf;
1028 #endif /* ARCHAIC */
1029 	type = icp->icmp_type;
1030 	code = icp->icmp_code;
1031 	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
1032 	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
1033 		struct ip *hip;
1034 		struct udphdr *up;
1035 		struct icmp *icmpp;
1036 
1037 		hip = &icp->icmp_ip;
1038 		hlen = hip->ip_hl << 2;
1039 
1040 		switch (proto) {
1041 		case IPPROTO_ICMP:
1042 			if (icmp_type == ICMP_ECHO &&
1043 			    type == ICMP_ECHOREPLY &&
1044 			    icp->icmp_id == htons(ident) &&
1045 			    icp->icmp_seq == htons(seq))
1046 				return (-2); /* we got there */
1047 
1048 			icmpp = (struct icmp *)((u_char *)hip + hlen);
1049 			if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
1050 			    icmpp->icmp_id == htons(ident) &&
1051 			    icmpp->icmp_seq == htons(seq))
1052 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1053 			break;
1054 
1055 		case IPPROTO_UDP:
1056 			up = (struct udphdr *)((u_char *)hip + hlen);
1057 			if (hlen + 12 <= cc && hip->ip_p == proto &&
1058 			    up->uh_sport == htons(ident) &&
1059 			    ((iflag && up->uh_dport == htons(port + seq)) ||
1060 			    (!iflag && up->uh_dport == htons(port))))
1061 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1062 			break;
1063 		default:
1064 			/* this is some odd, user specified proto,
1065 			 * how do we check it?
1066 			 */
1067 			if (hip->ip_p == proto)
1068 				return (type == ICMP_TIMXCEED? -1 : code + 1);
1069 		}
1070 	}
1071 #ifndef ARCHAIC
1072 	if (verbose) {
1073 		int i;
1074 		in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
1075 
1076 		printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
1077 		printf(" to %s", inet_ntoa(ip->ip_dst));
1078 		printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
1079 		    icp->icmp_code);
1080 		for (i = 4; i < cc ; i += sizeof(in_addr_t))
1081 			printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
1082 	}
1083 #endif /* ARCHAIC */
1084 	return (0);
1085 }
1086 
1087 static void
print(u_char * buf,int cc,struct sockaddr_in * from)1088 print(u_char *buf, int cc, struct sockaddr_in *from)
1089 {
1090 	struct ip *ip;
1091 	int hlen;
1092 
1093 	ip = (struct ip *) buf;
1094 	hlen = ip->ip_hl << 2;
1095 	cc -= hlen;
1096 
1097 	if (nflag)
1098 		printf(" %s", inet_ntoa(from->sin_addr));
1099 	else
1100 		printf(" %s (%s)", inetname(from->sin_addr),
1101 		    inet_ntoa(from->sin_addr));
1102 
1103 	if (verbose)
1104 		printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
1105 }
1106 
1107 
1108 /*
1109  * Checksum routine for Internet Protocol family headers (C Version)
1110  */
1111 static u_short
in_cksum(u_short * addr,int len)1112 in_cksum(u_short *addr, int len)
1113 {
1114 	u_short *w = addr, answer;
1115 	int nleft = len, sum = 0;
1116 
1117 	/*
1118 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
1119 	 *  we add sequential 16 bit words to it, and at the end, fold
1120 	 *  back all the carry bits from the top 16 bits into the lower
1121 	 *  16 bits.
1122 	 */
1123 	while (nleft > 1)  {
1124 		sum += *w++;
1125 		nleft -= 2;
1126 	}
1127 
1128 	/* mop up an odd byte, if necessary */
1129 	if (nleft == 1)
1130 		sum += *(u_char *)w;
1131 
1132 	/*
1133 	 * add back carry outs from top 16 bits to low 16 bits
1134 	 */
1135 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
1136 	sum += (sum >> 16);			/* add carry */
1137 	answer = ~sum;				/* truncate to 16 bits */
1138 	return (answer);
1139 }
1140 
1141 /*
1142  * Construct an Internet address representation.
1143  * If the nflag has been supplied, give
1144  * numeric value, otherwise try for symbolic name.
1145  */
1146 static char *
inetname(struct in_addr in)1147 inetname(struct in_addr in)
1148 {
1149 	static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1150 	static int first = 1;
1151 	struct hostent *hp;
1152 	char *cp;
1153 
1154 	if (first && !nflag) {
1155 		first = 0;
1156 		if (gethostname(domain, sizeof domain) == 0 &&
1157 		    (cp = strchr(domain, '.')) != NULL) {
1158 			strlcpy(domain, cp + 1, sizeof(domain));
1159 		}
1160 	}
1161 	if (!nflag && in.s_addr != INADDR_ANY) {
1162 		hp = gethostbyaddr(&in, sizeof(in), AF_INET);
1163 		if (hp != NULL) {
1164 			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1165 			    strcmp(cp + 1, domain) == 0)
1166 				*cp = '\0';
1167 			strlcpy(line, hp->h_name, sizeof(line));
1168 			return (line);
1169 		}
1170 	}
1171 	return (inet_ntoa(in));
1172 }
1173 
1174 static void
usage(void)1175 usage(void)
1176 {
1177 	fprintf(stderr,
1178 	    "usage: %s [-cdDIlMnrSv] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1179 	    "\t[-p port] [-P proto] [-q nqueries] [-s src_addr] [-t tos]\n"
1180 	    "\t[-w waittime] host [packetsize]\n", getprogname());
1181 	exit(1);
1182 }
1183