1 /* Copyright: (c) 2009-2010 by Robert David Graham */
2 /****************************************************************************
3 
4         PREPROCESS PACKETS
5 
6   This function parses the entire TCP/IP stack looking for IP addresses and
7   ports. The intent is that this is the minimal parsing necessary to find
8   address/port information. While it does basic checking (to confirm length
9   information, for example), it does not do more extensive checking (like
10   whether the checksum is correct).
11 
12  ****************************************************************************/
13 #include "proto-preprocess.h"
14 #include <assert.h>
15 #include <stdio.h>
16 #include <string.h>
17 
18 #define ex32be(px)  (   *((unsigned char*)(px)+0)<<24 \
19                     |   *((unsigned char*)(px)+1)<<16 \
20                     |   *((unsigned char*)(px)+2)<< 8 \
21                     |   *((unsigned char*)(px)+3)<< 0 )
22 #define ex32le(px)  (   *((unsigned char*)(px)+0)<< 0 \
23                     |   *((unsigned char*)(px)+1)<< 8 \
24                     |   *((unsigned char*)(px)+2)<<16 \
25                     |   *((unsigned char*)(px)+3)<<24 )
26 #define ex16be(px)  (   *((unsigned char*)(px)+0)<< 8 \
27                     |   *((unsigned char*)(px)+1)<< 0 )
28 #define ex16le(px)  (   *((unsigned char*)(px)+0)<< 0 \
29                     |   *((unsigned char*)(px)+1)<< 8 )
30 
31 #define ex24be(px)  (   *((unsigned char*)(px)+0)<<16 \
32                     |   *((unsigned char*)(px)+1)<< 8 \
33                     |   *((unsigned char*)(px)+2)<< 0 )
34 #define ex24le(px)  (   *((unsigned char*)(px)+0)<< 0 \
35                     |   *((unsigned char*)(px)+1)<< 8 \
36                     |   *((unsigned char*)(px)+2)<<16 )
37 
38 #define ex64be(px)  ( (((uint64_t)ex32be(px))<<32L) + ((uint64_t)ex32be((px)+4)) )
39 #define ex64le(px)  ( ((uint64_t)ex32be(px)) + (((uint64_t)ex32be((px)+4))<<32L) )
40 
41 /**
42  *  Call this frequently while parsing through the headers to make sure that
43  *  we don't go past the end of a packet. Remember that 1 byte past the
44  *  end can cause a crash.
45  **/
46 #define VERIFY_REMAINING(n,f) if (offset+(n) > length) return 0; else {info->found_offset=offset; info->found=f;}
47 
48 
49 /****************************************************************************
50  ****************************************************************************/
51 unsigned
preprocess_frame(const unsigned char * px,unsigned length,unsigned link_type,struct PreprocessedInfo * info)52 preprocess_frame(const unsigned char *px, unsigned length, unsigned link_type,
53                  struct PreprocessedInfo *info)
54 {
55     unsigned offset = 0;
56     unsigned ethertype = 0;
57 
58     info->transport_offset = 0;
59     info->found = FOUND_NOTHING;
60     info->found_offset = 0;
61 
62     /* If not standard Ethernet, go do something else */
63     if (link_type != 1)
64         goto parse_linktype;
65 
66 parse_ethernet:
67     VERIFY_REMAINING(14, FOUND_ETHERNET);
68 
69     info->mac_dst = px+offset+0;
70     info->mac_src = px+offset+6;
71     ethertype = ex16be(px+offset+12);
72     offset += 14;
73     if (ethertype < 2000)
74         goto parse_llc;
75     if (ethertype != 0x0800)
76         goto parse_ethertype;
77 
78 parse_ipv4:
79     {
80         unsigned header_length;
81         unsigned flags;
82         unsigned fragment_offset;
83         unsigned total_length;
84 
85         info->ip_offset = offset;
86         VERIFY_REMAINING(20, FOUND_IPV4);
87 
88         /* Check version */
89         if ((px[offset]>>4) != 4)
90             return 0; /* not IPv4 or corrupt */
91 
92         /* Check header length */
93         header_length = (px[offset] & 0x0F) * 4;
94         VERIFY_REMAINING(header_length, FOUND_IPV4);
95 
96         /*TODO: verify checksum */
97 
98         /* Check for fragmentation */
99         flags = px[offset+6]&0xE0;
100         fragment_offset = (ex16be(px+offset+6) & 0x3FFF) << 3;
101         if (fragment_offset != 0 || (flags & 0x20))
102             return 0; /* fragmented */
103 
104         /* Check for total-length */
105         total_length = ex16be(px+offset+2);
106         VERIFY_REMAINING(total_length, FOUND_IPV4);
107         if (total_length < header_length)
108             return 0; /* weird corruption */
109         length = offset + total_length; /* reduce the max length */
110 
111 
112         /* Save off pseudo header for checksum calculation */
113         info->ip_version = (px[offset]>>4)&0xF;
114         info->_ip_src = px+offset+12;
115         info->_ip_dst = px+offset+16;
116         info->src_ip.ipv4 = px[offset+12] << 24
117                             | px[offset+13] << 16
118                             | px[offset+14] << 8
119                             | px[offset+15] << 0;
120         info->src_ip.version = 4;
121         info->dst_ip.ipv4 = px[offset+16] << 24
122                             | px[offset+17] << 16
123                             | px[offset+18] << 8
124                             | px[offset+19] << 0;
125         info->dst_ip.version = 4;
126 
127         info->ip_ttl = px[offset+8];
128         info->ip_protocol = px[offset+9];
129         info->ip_length = total_length;
130         if (info->ip_version != 4)
131             return 0;
132 
133         /* next protocol */
134         offset += header_length;
135         info->transport_offset = offset;
136         info->transport_length = length - info->transport_offset;
137 
138         switch (info->ip_protocol) {
139         case   1: goto parse_icmp;
140         case   2: goto parse_igmp;
141         case   6: goto parse_tcp;
142         case  17: goto parse_udp;
143         case 132: goto parse_sctp;
144         default:
145                 VERIFY_REMAINING(0, FOUND_OPROTO);
146                 return 0; /* todo: should add more protocols, like ICMP */
147         }
148     }
149 
150 parse_tcp:
151     {
152         unsigned tcp_length;
153         VERIFY_REMAINING(20, FOUND_TCP);
154         tcp_length = px[offset + 12]>>2;
155         VERIFY_REMAINING(tcp_length, FOUND_TCP);
156         info->port_src = ex16be(px+offset+0);
157         info->port_dst = ex16be(px+offset+2);
158         info->app_offset = offset + tcp_length;
159         info->app_length = length - info->app_offset;
160         //assert(info->app_length < 2000);
161 
162         return 1;
163     }
164 
165 parse_udp:
166     {
167         VERIFY_REMAINING(8, FOUND_UDP);
168 
169         info->port_src = ex16be(px+offset+0);
170         info->port_dst = ex16be(px+offset+2);
171         offset += 8;
172         info->app_offset = offset;
173         info->app_length = length - info->app_offset;
174         assert(info->app_length < 2000);
175 
176         if (info->port_dst == 53 || info->port_src == 53) {
177             goto parse_dns;
178         }
179         return 1;
180     }
181 
182 parse_icmp:
183     {
184         VERIFY_REMAINING(4, FOUND_ICMP);
185         info->port_src = px[offset+0];
186         info->port_dst = px[offset+1];
187         return 1;
188     }
189 
190 parse_igmp:
191     {
192         VERIFY_REMAINING(4, FOUND_IGMP);
193         info->port_src = 0;
194         info->port_dst = px[offset+0];
195         return 1;
196     }
197 
198 parse_sctp:
199     {
200         VERIFY_REMAINING(12, FOUND_SCTP);
201         info->port_src = ex16be(px+offset+0);
202         info->port_dst = ex16be(px+offset+2);
203         info->app_offset = offset + 12;
204         info->app_length = length - info->app_offset;
205         assert(info->app_length < 2000);
206         return 1;
207     }
208 
209 parse_dns:
210     {
211         VERIFY_REMAINING(8, FOUND_DNS);
212         return 1;
213     }
214 
215 parse_ipv6:
216     {
217         unsigned payload_length;
218 
219         info->ip_offset = offset;
220         VERIFY_REMAINING(40, FOUND_IPV6);
221 
222         /* Check version */
223         if ((px[offset]>>4) != 6)
224             return 0; /* not IPv4 or corrupt */
225 
226         /* Payload length */
227         payload_length = ex16be(px+offset+4);
228         VERIFY_REMAINING(40+payload_length, FOUND_IPV6);
229         if (length > offset + 40 + payload_length)
230             length = offset + 40 + payload_length;
231 
232         /* Save off pseudo header for checksum calculation */
233         info->ip_version = (px[offset]>>4)&0xF;
234         info->_ip_src = px+offset+8;
235         info->_ip_dst = px+offset+8+16;
236         info->ip_protocol = px[offset+6];
237 
238         info->src_ip.version = 6;
239         info->src_ip.ipv6.hi = 0ULL
240                             | (uint64_t)px[offset +  8] << 56ULL
241                             | (uint64_t)px[offset +  9] << 48ULL
242                             | (uint64_t)px[offset + 10] << 40ULL
243                             | (uint64_t)px[offset + 11] << 32ULL
244                             | (uint64_t)px[offset + 12] << 24ULL
245                             | (uint64_t)px[offset + 13] << 16ULL
246                             | (uint64_t)px[offset + 14] <<  8ULL
247                             | (uint64_t)px[offset + 15] <<  0ULL;
248         info->src_ip.ipv6.lo = 0ULL
249                             | (uint64_t)px[offset + 16] << 56ULL
250                             | (uint64_t)px[offset + 17] << 48ULL
251                             | (uint64_t)px[offset + 18] << 40ULL
252                             | (uint64_t)px[offset + 19] << 32ULL
253                             | (uint64_t)px[offset + 20] << 24ULL
254                             | (uint64_t)px[offset + 21] << 16ULL
255                             | (uint64_t)px[offset + 22] <<  8ULL
256                             | (uint64_t)px[offset + 23] <<  0ULL;
257 
258         info->dst_ip.version = 6;
259         info->dst_ip.ipv6.hi = 0ULL
260                             | (uint64_t)px[offset + 24] << 56ULL
261                             | (uint64_t)px[offset + 25] << 48ULL
262                             | (uint64_t)px[offset + 26] << 40ULL
263                             | (uint64_t)px[offset + 27] << 32ULL
264                             | (uint64_t)px[offset + 28] << 24ULL
265                             | (uint64_t)px[offset + 29] << 16ULL
266                             | (uint64_t)px[offset + 30] <<  8ULL
267                             | (uint64_t)px[offset + 31] <<  0ULL;
268         info->dst_ip.ipv6.lo = 0ULL
269                             | (uint64_t)px[offset + 32] << 56ULL
270                             | (uint64_t)px[offset + 33] << 48ULL
271                             | (uint64_t)px[offset + 34] << 40ULL
272                             | (uint64_t)px[offset + 35] << 32ULL
273                             | (uint64_t)px[offset + 36] << 24ULL
274                             | (uint64_t)px[offset + 37] << 16ULL
275                             | (uint64_t)px[offset + 38] <<  8ULL
276                             | (uint64_t)px[offset + 39] <<  0ULL;
277 
278 
279 
280         /* next protocol */
281         offset += 40;
282         info->transport_offset = offset;
283         info->transport_length = length - info->transport_offset;
284 
285 parse_ipv6_next:
286         switch (info->ip_protocol) {
287         case 0: goto parse_ipv6_hop_by_hop;
288         case 6: goto parse_tcp;
289         case 17: goto parse_udp;
290         case 58: goto parse_icmpv6;
291         case 132: goto parse_sctp;
292         case 0x2c: /* IPv6 fragmetn */
293             return 0;
294         default:
295             //printf("***** test me ******\n");
296             return 0; /* todo: should add more protocols, like ICMP */
297         }
298     }
299 
300 parse_ipv6_hop_by_hop:
301     {
302         unsigned len;
303 
304         VERIFY_REMAINING(8, FOUND_IPV6_HOP);
305         info->ip_protocol = px[offset];
306         len = px[offset+1] + 8;
307 
308         VERIFY_REMAINING(len, FOUND_IPV6_HOP);
309         offset += len;
310         info->transport_offset = offset;
311         info->transport_length = length - info->transport_offset;
312     }
313     goto parse_ipv6_next;
314 
315 parse_icmpv6:
316     {
317         unsigned icmp_type;
318         unsigned icmp_code;
319 
320         VERIFY_REMAINING(4, FOUND_ICMP);
321 
322         icmp_type = px[offset+0];
323         icmp_code = px[offset+1];
324 
325         info->port_src = icmp_type;
326         info->port_dst = icmp_code;
327 
328         if (133 <= icmp_type && icmp_type <= 136) {
329             info->found = FOUND_NDPv6;
330         }
331     }
332     return 1;
333 
334 parse_vlan8021q:
335     VERIFY_REMAINING(4, FOUND_8021Q);
336     ethertype = ex16be(px+offset+2);
337     offset += 4;
338     goto parse_ethertype;
339 
340 parse_vlanmpls:
341     /* MULTILEVEL:
342      * Regress: wireshark/mpls-twolevel.cap(9)
343      * There can be multiple layers of MPLS tags. This is marked by a
344      * flag in the header whether the current header is the "final"
345      * header in the stack*/
346     while (offset + 4 < length && !(px[offset+2] & 1))
347         offset += 4;
348 
349     VERIFY_REMAINING(4, FOUND_MPLS);
350     offset += 4;
351 
352     if (px[offset-4+2]&1) {
353         goto parse_ipv4;
354     } else
355         return 0;
356 
357 
358 
359 wifi_data:
360     {
361         unsigned flag;
362         VERIFY_REMAINING(24, FOUND_WIFI_DATA);
363 
364         flag = px[offset];
365 
366         switch (px[offset+1]&0x03) {
367         case 0:
368         case 2:
369             info->mac_dst = px+offset+4;
370             info->mac_bss = px+offset+10;
371             info->mac_src = px+offset+16;
372             break;
373         case 1:
374             info->mac_bss = px+offset+4;
375             info->mac_src = px+offset+10;
376             info->mac_dst = px+offset+16;
377             break;
378         case 3:
379             info->mac_bss = (const unsigned char*)"\0\0\0\0\0\0";
380             info->mac_dst = px+offset+16;
381             info->mac_src = px+offset+24;
382             offset += 6;
383             break;
384         }
385 
386 
387         if ((px[offset+1]&0x04) != 0 || (px[offset+22]&0xF) != 0)
388             return 0;
389 
390         offset += 24;
391         if (flag == 0x88) {
392             offset += 2;
393         }
394 
395         goto parse_llc;
396     }
397 
398 parse_wifi:
399     VERIFY_REMAINING(2, FOUND_WIFI);
400     switch (px[offset]) {
401     case 0x08:
402     case 0x88: /* QoS data */
403         if (px[1] & 0x40)
404             return 0;
405         goto wifi_data;
406         break;
407     default:
408         return 0;
409     }
410 
411 parse_radiotap_header:
412     /* Radiotap headers for WiFi. http://www.radiotap.org/
413      *
414      *   struct ieee80211_radiotap_header {
415      *           u_int8_t        it_version;     // set to 0
416      *           u_int8_t        it_pad;
417      *           u_int16_t       it_len;         // entire length
418      *           u_int32_t       it_present;     // fields present
419      *   };
420      */
421     {
422         unsigned header_length;
423         unsigned features;
424 
425         VERIFY_REMAINING(8, FOUND_RADIOTAP);
426         if (px[offset] != 0)
427             return 0;
428         header_length = ex16le(px+offset+2);
429         features = ex32le(px+offset+4);
430 
431         VERIFY_REMAINING(header_length, FOUND_RADIOTAP);
432 
433         /* If FCS is present at the end of the packet, then change
434          * the length to remove it */
435         if (features & 0x4000) {
436             unsigned fcs_header = ex32le(px+offset+header_length-4);
437             unsigned fcs_frame = ex32le(px+length-4);
438             if (fcs_header == fcs_frame)
439                 length -= 4;
440             VERIFY_REMAINING(header_length, FOUND_RADIOTAP);
441         }
442         offset += header_length;
443         goto parse_wifi;
444     }
445 
446 
447 parse_prism_header:
448     /* DLT_PRISM_HEADER */
449     /* This was original created to handle Prism II cards, but now we see this
450      * from other cards as well, such as the 'madwifi' drivers using Atheros
451      * chipsets.
452      *
453      * This starts with a "TLV" format, a 4-byte little-endian tag, followed by
454      * a 4-byte little-endian length. This TLV should contain the entire Prism
455      * header, after which we'll find the real header. Therefore, we should just
456      * be able to parse the 'length', and skip that many bytes. I'm told it's more
457      * complicated than that, but it seems to work right now, so I'm keeping it
458      * this way.
459      */
460     {
461         unsigned header_length;
462         VERIFY_REMAINING(8, FOUND_PRISM);
463 
464         if (ex32le(px+offset+0) != 0x00000044)
465             return 0;
466         header_length = ex32le(px+offset+4);
467         if (header_length > 0xFFFFF)
468             return 0;
469         VERIFY_REMAINING(header_length, FOUND_PRISM);
470         offset += header_length;
471         goto parse_wifi;
472     }
473 
474 parse_llc:
475     {
476         unsigned oui;
477 
478         VERIFY_REMAINING(3, FOUND_LLC);
479 
480         switch (ex24be(px+offset)) {
481         case 0x0000aa: offset += 2; goto parse_llc;
482         default: return 0;
483         case 0xaaaa03: break;
484         }
485 
486         offset +=3 ;
487 
488         VERIFY_REMAINING(5, FOUND_LLC);
489 
490         oui = ex24be(px+offset);
491         ethertype = ex16be(px+offset+3);
492         offset += 5;
493 
494         switch (oui){
495         case 0x000000: goto parse_ethertype;
496         default: return 0;
497         }
498 
499     }
500 
501 parse_ethertype:
502     switch (ethertype) {
503     case 0x0800: goto parse_ipv4;
504     case 0x0806: goto parse_arp;
505     case 0x86dd: goto parse_ipv6;
506     case 0x8100: goto parse_vlan8021q;
507     case 0x8847: goto parse_vlanmpls;
508     default: return 0;
509     }
510 
511 parse_linktype:
512     /*
513      * The "link-type" is the same as specified in "libpcap" headers
514      */
515     switch (link_type) {
516         case 0:
517             offset += 4;
518             switch (ex32be(px)) {
519                 case 0x02000000:
520                 case 0x00000002:
521                     goto parse_ipv4;
522                 /* Depending on operating system, these can have
523                  different values: 24, 28, or 30 */
524                 case 0x18000000:
525                 case 0x00000018:
526                 case 0x1c000000:
527                 case 0x0000001c:
528                 case 0x1e000000:
529                 case 0x0000001e:
530                     goto parse_ipv6;
531             }
532             return 0;
533         case 1:     goto parse_ethernet;
534         case 12:
535             switch (px[offset]>>4) {
536 		case 4: goto parse_ipv4;
537                 case 6: goto parse_ipv6;
538             }
539             return 0;
540         case 0x69:  goto parse_wifi;
541         case 113:   goto parse_linux_sll; /* LINKTYPE_LINUX_SLL DLT_LINUX_SLL */
542         case 119:   goto parse_prism_header;
543         case 127:   goto parse_radiotap_header;
544         default:    return 0;
545     }
546 
547 parse_linux_sll:
548     /*
549      +--------+--------+
550      |    packet type  |
551      +--------+--------+
552      |   ARPHRD_ type  |
553      +--------+--------+
554      |   addr length   |
555      +--------+--------+
556      |                 |
557      +  first 8 bytes  +
558      |     of the      |
559      +  hardware/MAC   +
560      |     address     |
561      +                 +
562      |                 |
563      +--------+--------+
564      |     ethertype   |
565      +--------+--------+
566      */
567     {
568         struct {
569             unsigned packet_type;
570             unsigned arp_type;
571             unsigned addr_length;
572             unsigned char mac_address[8];
573             unsigned ethertype;
574         } sll;
575 
576         VERIFY_REMAINING(16, FOUND_SLL);
577 
578         sll.packet_type = ex16be(px+offset+0);
579         sll.arp_type = ex16be(px+offset+2);
580         sll.addr_length = ex16be(px+offset+4);
581         memcpy(sll.mac_address, px+offset+6, 8);
582         sll.ethertype = ex16be(px+offset+14);
583 
584         offset += 16;
585 
586         goto parse_ethertype;
587     }
588 
589 parse_arp:
590     info->ip_version = 256;
591     info->ip_offset = offset;
592     {
593         //unsigned hardware_type;
594         //unsigned protocol_type;
595         unsigned hardware_length;
596         unsigned protocol_length;
597         unsigned opcode;
598 
599         VERIFY_REMAINING(8, FOUND_ARP);
600         //hardware_type = px[offset]<<8 | px[offset+1];
601         //protocol_type = px[offset+2]<<8 | px[offset+3];
602         hardware_length = px[offset+4];
603         protocol_length = px[offset+5];
604         opcode = px[offset+6]<<8 | px[offset+7];
605         info->opcode = opcode;
606         info->ip_protocol = opcode;
607         offset += 8;
608 
609         VERIFY_REMAINING(2*hardware_length + 2*protocol_length, FOUND_ARP);
610 
611         info->_ip_src = px + offset + hardware_length;
612         info->_ip_dst = px + offset + 2*hardware_length + protocol_length;
613 
614         info->src_ip.version = 4;
615         info->src_ip.ipv4 = px[offset + hardware_length + 0] << 24
616                             | px[offset + hardware_length + 1] << 16
617                             | px[offset + hardware_length + 2] << 8
618                             | px[offset + hardware_length + 3] << 0;
619         info->dst_ip.version = 4;
620         info->dst_ip.ipv4 = px[offset + 2*hardware_length + protocol_length + 0] << 24
621                             | px[offset + 2*hardware_length + protocol_length + 1] << 16
622                             | px[offset + 2*hardware_length + protocol_length + 2] << 8
623                             | px[offset + 2*hardware_length + protocol_length + 3] << 0;
624 
625         info->found_offset = info->ip_offset;
626         return 1;
627     }
628 
629 }
630