1 /* Copyright: (c) 2009-2010 by Robert David Graham */
2 /****************************************************************************
3
4 PREPROCESS PACKETS
5
6 This function parses the entire TCP/IP stack looking for IP addresses and
7 ports. The intent is that this is the minimal parsing necessary to find
8 address/port information. While it does basic checking (to confirm length
9 information, for example), it does not do more extensive checking (like
10 whether the checksum is correct).
11
12 ****************************************************************************/
13 #include "proto-preprocess.h"
14 #include <assert.h>
15 #include <stdio.h>
16 #include <string.h>
17
18 #define ex32be(px) ( *((unsigned char*)(px)+0)<<24 \
19 | *((unsigned char*)(px)+1)<<16 \
20 | *((unsigned char*)(px)+2)<< 8 \
21 | *((unsigned char*)(px)+3)<< 0 )
22 #define ex32le(px) ( *((unsigned char*)(px)+0)<< 0 \
23 | *((unsigned char*)(px)+1)<< 8 \
24 | *((unsigned char*)(px)+2)<<16 \
25 | *((unsigned char*)(px)+3)<<24 )
26 #define ex16be(px) ( *((unsigned char*)(px)+0)<< 8 \
27 | *((unsigned char*)(px)+1)<< 0 )
28 #define ex16le(px) ( *((unsigned char*)(px)+0)<< 0 \
29 | *((unsigned char*)(px)+1)<< 8 )
30
31 #define ex24be(px) ( *((unsigned char*)(px)+0)<<16 \
32 | *((unsigned char*)(px)+1)<< 8 \
33 | *((unsigned char*)(px)+2)<< 0 )
34 #define ex24le(px) ( *((unsigned char*)(px)+0)<< 0 \
35 | *((unsigned char*)(px)+1)<< 8 \
36 | *((unsigned char*)(px)+2)<<16 )
37
38 #define ex64be(px) ( (((uint64_t)ex32be(px))<<32L) + ((uint64_t)ex32be((px)+4)) )
39 #define ex64le(px) ( ((uint64_t)ex32be(px)) + (((uint64_t)ex32be((px)+4))<<32L) )
40
41 /**
42 * Call this frequently while parsing through the headers to make sure that
43 * we don't go past the end of a packet. Remember that 1 byte past the
44 * end can cause a crash.
45 **/
46 #define VERIFY_REMAINING(n,f) if (offset+(n) > length) return 0; else {info->found_offset=offset; info->found=f;}
47
48
49 /****************************************************************************
50 ****************************************************************************/
51 unsigned
preprocess_frame(const unsigned char * px,unsigned length,unsigned link_type,struct PreprocessedInfo * info)52 preprocess_frame(const unsigned char *px, unsigned length, unsigned link_type,
53 struct PreprocessedInfo *info)
54 {
55 unsigned offset = 0;
56 unsigned ethertype = 0;
57
58 info->transport_offset = 0;
59 info->found = FOUND_NOTHING;
60 info->found_offset = 0;
61
62 /* If not standard Ethernet, go do something else */
63 if (link_type != 1)
64 goto parse_linktype;
65
66 parse_ethernet:
67 VERIFY_REMAINING(14, FOUND_ETHERNET);
68
69 info->mac_dst = px+offset+0;
70 info->mac_src = px+offset+6;
71 ethertype = ex16be(px+offset+12);
72 offset += 14;
73 if (ethertype < 2000)
74 goto parse_llc;
75 if (ethertype != 0x0800)
76 goto parse_ethertype;
77
78 parse_ipv4:
79 {
80 unsigned header_length;
81 unsigned flags;
82 unsigned fragment_offset;
83 unsigned total_length;
84
85 info->ip_offset = offset;
86 VERIFY_REMAINING(20, FOUND_IPV4);
87
88 /* Check version */
89 if ((px[offset]>>4) != 4)
90 return 0; /* not IPv4 or corrupt */
91
92 /* Check header length */
93 header_length = (px[offset] & 0x0F) * 4;
94 VERIFY_REMAINING(header_length, FOUND_IPV4);
95
96 /*TODO: verify checksum */
97
98 /* Check for fragmentation */
99 flags = px[offset+6]&0xE0;
100 fragment_offset = (ex16be(px+offset+6) & 0x3FFF) << 3;
101 if (fragment_offset != 0 || (flags & 0x20))
102 return 0; /* fragmented */
103
104 /* Check for total-length */
105 total_length = ex16be(px+offset+2);
106 VERIFY_REMAINING(total_length, FOUND_IPV4);
107 if (total_length < header_length)
108 return 0; /* weird corruption */
109 length = offset + total_length; /* reduce the max length */
110
111
112 /* Save off pseudo header for checksum calculation */
113 info->ip_version = (px[offset]>>4)&0xF;
114 info->_ip_src = px+offset+12;
115 info->_ip_dst = px+offset+16;
116 info->src_ip.ipv4 = px[offset+12] << 24
117 | px[offset+13] << 16
118 | px[offset+14] << 8
119 | px[offset+15] << 0;
120 info->src_ip.version = 4;
121 info->dst_ip.ipv4 = px[offset+16] << 24
122 | px[offset+17] << 16
123 | px[offset+18] << 8
124 | px[offset+19] << 0;
125 info->dst_ip.version = 4;
126
127 info->ip_ttl = px[offset+8];
128 info->ip_protocol = px[offset+9];
129 info->ip_length = total_length;
130 if (info->ip_version != 4)
131 return 0;
132
133 /* next protocol */
134 offset += header_length;
135 info->transport_offset = offset;
136 info->transport_length = length - info->transport_offset;
137
138 switch (info->ip_protocol) {
139 case 1: goto parse_icmp;
140 case 2: goto parse_igmp;
141 case 6: goto parse_tcp;
142 case 17: goto parse_udp;
143 case 132: goto parse_sctp;
144 default:
145 VERIFY_REMAINING(0, FOUND_OPROTO);
146 return 0; /* todo: should add more protocols, like ICMP */
147 }
148 }
149
150 parse_tcp:
151 {
152 unsigned tcp_length;
153 VERIFY_REMAINING(20, FOUND_TCP);
154 tcp_length = px[offset + 12]>>2;
155 VERIFY_REMAINING(tcp_length, FOUND_TCP);
156 info->port_src = ex16be(px+offset+0);
157 info->port_dst = ex16be(px+offset+2);
158 info->app_offset = offset + tcp_length;
159 info->app_length = length - info->app_offset;
160 //assert(info->app_length < 2000);
161
162 return 1;
163 }
164
165 parse_udp:
166 {
167 VERIFY_REMAINING(8, FOUND_UDP);
168
169 info->port_src = ex16be(px+offset+0);
170 info->port_dst = ex16be(px+offset+2);
171 offset += 8;
172 info->app_offset = offset;
173 info->app_length = length - info->app_offset;
174 assert(info->app_length < 2000);
175
176 if (info->port_dst == 53 || info->port_src == 53) {
177 goto parse_dns;
178 }
179 return 1;
180 }
181
182 parse_icmp:
183 {
184 VERIFY_REMAINING(4, FOUND_ICMP);
185 info->port_src = px[offset+0];
186 info->port_dst = px[offset+1];
187 return 1;
188 }
189
190 parse_igmp:
191 {
192 VERIFY_REMAINING(4, FOUND_IGMP);
193 info->port_src = 0;
194 info->port_dst = px[offset+0];
195 return 1;
196 }
197
198 parse_sctp:
199 {
200 VERIFY_REMAINING(12, FOUND_SCTP);
201 info->port_src = ex16be(px+offset+0);
202 info->port_dst = ex16be(px+offset+2);
203 info->app_offset = offset + 12;
204 info->app_length = length - info->app_offset;
205 assert(info->app_length < 2000);
206 return 1;
207 }
208
209 parse_dns:
210 {
211 VERIFY_REMAINING(8, FOUND_DNS);
212 return 1;
213 }
214
215 parse_ipv6:
216 {
217 unsigned payload_length;
218
219 info->ip_offset = offset;
220 VERIFY_REMAINING(40, FOUND_IPV6);
221
222 /* Check version */
223 if ((px[offset]>>4) != 6)
224 return 0; /* not IPv4 or corrupt */
225
226 /* Payload length */
227 payload_length = ex16be(px+offset+4);
228 VERIFY_REMAINING(40+payload_length, FOUND_IPV6);
229 if (length > offset + 40 + payload_length)
230 length = offset + 40 + payload_length;
231
232 /* Save off pseudo header for checksum calculation */
233 info->ip_version = (px[offset]>>4)&0xF;
234 info->_ip_src = px+offset+8;
235 info->_ip_dst = px+offset+8+16;
236 info->ip_protocol = px[offset+6];
237
238 info->src_ip.version = 6;
239 info->src_ip.ipv6.hi = 0ULL
240 | (uint64_t)px[offset + 8] << 56ULL
241 | (uint64_t)px[offset + 9] << 48ULL
242 | (uint64_t)px[offset + 10] << 40ULL
243 | (uint64_t)px[offset + 11] << 32ULL
244 | (uint64_t)px[offset + 12] << 24ULL
245 | (uint64_t)px[offset + 13] << 16ULL
246 | (uint64_t)px[offset + 14] << 8ULL
247 | (uint64_t)px[offset + 15] << 0ULL;
248 info->src_ip.ipv6.lo = 0ULL
249 | (uint64_t)px[offset + 16] << 56ULL
250 | (uint64_t)px[offset + 17] << 48ULL
251 | (uint64_t)px[offset + 18] << 40ULL
252 | (uint64_t)px[offset + 19] << 32ULL
253 | (uint64_t)px[offset + 20] << 24ULL
254 | (uint64_t)px[offset + 21] << 16ULL
255 | (uint64_t)px[offset + 22] << 8ULL
256 | (uint64_t)px[offset + 23] << 0ULL;
257
258 info->dst_ip.version = 6;
259 info->dst_ip.ipv6.hi = 0ULL
260 | (uint64_t)px[offset + 24] << 56ULL
261 | (uint64_t)px[offset + 25] << 48ULL
262 | (uint64_t)px[offset + 26] << 40ULL
263 | (uint64_t)px[offset + 27] << 32ULL
264 | (uint64_t)px[offset + 28] << 24ULL
265 | (uint64_t)px[offset + 29] << 16ULL
266 | (uint64_t)px[offset + 30] << 8ULL
267 | (uint64_t)px[offset + 31] << 0ULL;
268 info->dst_ip.ipv6.lo = 0ULL
269 | (uint64_t)px[offset + 32] << 56ULL
270 | (uint64_t)px[offset + 33] << 48ULL
271 | (uint64_t)px[offset + 34] << 40ULL
272 | (uint64_t)px[offset + 35] << 32ULL
273 | (uint64_t)px[offset + 36] << 24ULL
274 | (uint64_t)px[offset + 37] << 16ULL
275 | (uint64_t)px[offset + 38] << 8ULL
276 | (uint64_t)px[offset + 39] << 0ULL;
277
278
279
280 /* next protocol */
281 offset += 40;
282 info->transport_offset = offset;
283 info->transport_length = length - info->transport_offset;
284
285 parse_ipv6_next:
286 switch (info->ip_protocol) {
287 case 0: goto parse_ipv6_hop_by_hop;
288 case 6: goto parse_tcp;
289 case 17: goto parse_udp;
290 case 58: goto parse_icmpv6;
291 case 132: goto parse_sctp;
292 case 0x2c: /* IPv6 fragmetn */
293 return 0;
294 default:
295 //printf("***** test me ******\n");
296 return 0; /* todo: should add more protocols, like ICMP */
297 }
298 }
299
300 parse_ipv6_hop_by_hop:
301 {
302 unsigned len;
303
304 VERIFY_REMAINING(8, FOUND_IPV6_HOP);
305 info->ip_protocol = px[offset];
306 len = px[offset+1] + 8;
307
308 VERIFY_REMAINING(len, FOUND_IPV6_HOP);
309 offset += len;
310 info->transport_offset = offset;
311 info->transport_length = length - info->transport_offset;
312 }
313 goto parse_ipv6_next;
314
315 parse_icmpv6:
316 {
317 unsigned icmp_type;
318 unsigned icmp_code;
319
320 VERIFY_REMAINING(4, FOUND_ICMP);
321
322 icmp_type = px[offset+0];
323 icmp_code = px[offset+1];
324
325 info->port_src = icmp_type;
326 info->port_dst = icmp_code;
327
328 if (133 <= icmp_type && icmp_type <= 136) {
329 info->found = FOUND_NDPv6;
330 }
331 }
332 return 1;
333
334 parse_vlan8021q:
335 VERIFY_REMAINING(4, FOUND_8021Q);
336 ethertype = ex16be(px+offset+2);
337 offset += 4;
338 goto parse_ethertype;
339
340 parse_vlanmpls:
341 /* MULTILEVEL:
342 * Regress: wireshark/mpls-twolevel.cap(9)
343 * There can be multiple layers of MPLS tags. This is marked by a
344 * flag in the header whether the current header is the "final"
345 * header in the stack*/
346 while (offset + 4 < length && !(px[offset+2] & 1))
347 offset += 4;
348
349 VERIFY_REMAINING(4, FOUND_MPLS);
350 offset += 4;
351
352 if (px[offset-4+2]&1) {
353 goto parse_ipv4;
354 } else
355 return 0;
356
357
358
359 wifi_data:
360 {
361 unsigned flag;
362 VERIFY_REMAINING(24, FOUND_WIFI_DATA);
363
364 flag = px[offset];
365
366 switch (px[offset+1]&0x03) {
367 case 0:
368 case 2:
369 info->mac_dst = px+offset+4;
370 info->mac_bss = px+offset+10;
371 info->mac_src = px+offset+16;
372 break;
373 case 1:
374 info->mac_bss = px+offset+4;
375 info->mac_src = px+offset+10;
376 info->mac_dst = px+offset+16;
377 break;
378 case 3:
379 info->mac_bss = (const unsigned char*)"\0\0\0\0\0\0";
380 info->mac_dst = px+offset+16;
381 info->mac_src = px+offset+24;
382 offset += 6;
383 break;
384 }
385
386
387 if ((px[offset+1]&0x04) != 0 || (px[offset+22]&0xF) != 0)
388 return 0;
389
390 offset += 24;
391 if (flag == 0x88) {
392 offset += 2;
393 }
394
395 goto parse_llc;
396 }
397
398 parse_wifi:
399 VERIFY_REMAINING(2, FOUND_WIFI);
400 switch (px[offset]) {
401 case 0x08:
402 case 0x88: /* QoS data */
403 if (px[1] & 0x40)
404 return 0;
405 goto wifi_data;
406 break;
407 default:
408 return 0;
409 }
410
411 parse_radiotap_header:
412 /* Radiotap headers for WiFi. http://www.radiotap.org/
413 *
414 * struct ieee80211_radiotap_header {
415 * u_int8_t it_version; // set to 0
416 * u_int8_t it_pad;
417 * u_int16_t it_len; // entire length
418 * u_int32_t it_present; // fields present
419 * };
420 */
421 {
422 unsigned header_length;
423 unsigned features;
424
425 VERIFY_REMAINING(8, FOUND_RADIOTAP);
426 if (px[offset] != 0)
427 return 0;
428 header_length = ex16le(px+offset+2);
429 features = ex32le(px+offset+4);
430
431 VERIFY_REMAINING(header_length, FOUND_RADIOTAP);
432
433 /* If FCS is present at the end of the packet, then change
434 * the length to remove it */
435 if (features & 0x4000) {
436 unsigned fcs_header = ex32le(px+offset+header_length-4);
437 unsigned fcs_frame = ex32le(px+length-4);
438 if (fcs_header == fcs_frame)
439 length -= 4;
440 VERIFY_REMAINING(header_length, FOUND_RADIOTAP);
441 }
442 offset += header_length;
443 goto parse_wifi;
444 }
445
446
447 parse_prism_header:
448 /* DLT_PRISM_HEADER */
449 /* This was original created to handle Prism II cards, but now we see this
450 * from other cards as well, such as the 'madwifi' drivers using Atheros
451 * chipsets.
452 *
453 * This starts with a "TLV" format, a 4-byte little-endian tag, followed by
454 * a 4-byte little-endian length. This TLV should contain the entire Prism
455 * header, after which we'll find the real header. Therefore, we should just
456 * be able to parse the 'length', and skip that many bytes. I'm told it's more
457 * complicated than that, but it seems to work right now, so I'm keeping it
458 * this way.
459 */
460 {
461 unsigned header_length;
462 VERIFY_REMAINING(8, FOUND_PRISM);
463
464 if (ex32le(px+offset+0) != 0x00000044)
465 return 0;
466 header_length = ex32le(px+offset+4);
467 if (header_length > 0xFFFFF)
468 return 0;
469 VERIFY_REMAINING(header_length, FOUND_PRISM);
470 offset += header_length;
471 goto parse_wifi;
472 }
473
474 parse_llc:
475 {
476 unsigned oui;
477
478 VERIFY_REMAINING(3, FOUND_LLC);
479
480 switch (ex24be(px+offset)) {
481 case 0x0000aa: offset += 2; goto parse_llc;
482 default: return 0;
483 case 0xaaaa03: break;
484 }
485
486 offset +=3 ;
487
488 VERIFY_REMAINING(5, FOUND_LLC);
489
490 oui = ex24be(px+offset);
491 ethertype = ex16be(px+offset+3);
492 offset += 5;
493
494 switch (oui){
495 case 0x000000: goto parse_ethertype;
496 default: return 0;
497 }
498
499 }
500
501 parse_ethertype:
502 switch (ethertype) {
503 case 0x0800: goto parse_ipv4;
504 case 0x0806: goto parse_arp;
505 case 0x86dd: goto parse_ipv6;
506 case 0x8100: goto parse_vlan8021q;
507 case 0x8847: goto parse_vlanmpls;
508 default: return 0;
509 }
510
511 parse_linktype:
512 /*
513 * The "link-type" is the same as specified in "libpcap" headers
514 */
515 switch (link_type) {
516 case 0:
517 offset += 4;
518 switch (ex32be(px)) {
519 case 0x02000000:
520 case 0x00000002:
521 goto parse_ipv4;
522 /* Depending on operating system, these can have
523 different values: 24, 28, or 30 */
524 case 0x18000000:
525 case 0x00000018:
526 case 0x1c000000:
527 case 0x0000001c:
528 case 0x1e000000:
529 case 0x0000001e:
530 goto parse_ipv6;
531 }
532 return 0;
533 case 1: goto parse_ethernet;
534 case 12:
535 switch (px[offset]>>4) {
536 case 4: goto parse_ipv4;
537 case 6: goto parse_ipv6;
538 }
539 return 0;
540 case 0x69: goto parse_wifi;
541 case 113: goto parse_linux_sll; /* LINKTYPE_LINUX_SLL DLT_LINUX_SLL */
542 case 119: goto parse_prism_header;
543 case 127: goto parse_radiotap_header;
544 default: return 0;
545 }
546
547 parse_linux_sll:
548 /*
549 +--------+--------+
550 | packet type |
551 +--------+--------+
552 | ARPHRD_ type |
553 +--------+--------+
554 | addr length |
555 +--------+--------+
556 | |
557 + first 8 bytes +
558 | of the |
559 + hardware/MAC +
560 | address |
561 + +
562 | |
563 +--------+--------+
564 | ethertype |
565 +--------+--------+
566 */
567 {
568 struct {
569 unsigned packet_type;
570 unsigned arp_type;
571 unsigned addr_length;
572 unsigned char mac_address[8];
573 unsigned ethertype;
574 } sll;
575
576 VERIFY_REMAINING(16, FOUND_SLL);
577
578 sll.packet_type = ex16be(px+offset+0);
579 sll.arp_type = ex16be(px+offset+2);
580 sll.addr_length = ex16be(px+offset+4);
581 memcpy(sll.mac_address, px+offset+6, 8);
582 sll.ethertype = ex16be(px+offset+14);
583
584 offset += 16;
585
586 goto parse_ethertype;
587 }
588
589 parse_arp:
590 info->ip_version = 256;
591 info->ip_offset = offset;
592 {
593 //unsigned hardware_type;
594 //unsigned protocol_type;
595 unsigned hardware_length;
596 unsigned protocol_length;
597 unsigned opcode;
598
599 VERIFY_REMAINING(8, FOUND_ARP);
600 //hardware_type = px[offset]<<8 | px[offset+1];
601 //protocol_type = px[offset+2]<<8 | px[offset+3];
602 hardware_length = px[offset+4];
603 protocol_length = px[offset+5];
604 opcode = px[offset+6]<<8 | px[offset+7];
605 info->opcode = opcode;
606 info->ip_protocol = opcode;
607 offset += 8;
608
609 VERIFY_REMAINING(2*hardware_length + 2*protocol_length, FOUND_ARP);
610
611 info->_ip_src = px + offset + hardware_length;
612 info->_ip_dst = px + offset + 2*hardware_length + protocol_length;
613
614 info->src_ip.version = 4;
615 info->src_ip.ipv4 = px[offset + hardware_length + 0] << 24
616 | px[offset + hardware_length + 1] << 16
617 | px[offset + hardware_length + 2] << 8
618 | px[offset + hardware_length + 3] << 0;
619 info->dst_ip.version = 4;
620 info->dst_ip.ipv4 = px[offset + 2*hardware_length + protocol_length + 0] << 24
621 | px[offset + 2*hardware_length + protocol_length + 1] << 16
622 | px[offset + 2*hardware_length + protocol_length + 2] << 8
623 | px[offset + 2*hardware_length + protocol_length + 3] << 0;
624
625 info->found_offset = info->ip_offset;
626 return 1;
627 }
628
629 }
630