1 /*
2  * $Id$
3  *
4  * Copyright (C) 2010 iptelorg GmbH
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 /** raw socket functions.
19  *  @file raw_sock.c
20  *  @ingroup core
21  *  Module: @ref core
22  */
23 /*
24  * History:
25  * --------
26  *  2010-06-07  initial version (from older code) andrei
27  *  2010-06-15  IP_HDRINCL raw socket support, including on-send
28  *               fragmentation (andrei)
29  */
30 
31 #include "ip_util.h"
32 #include "log.h"
33 
34 #include <errno.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <sys/types.h>
38 #include <fcntl.h>
39 #include <sys/socket.h>
40 #include <netinet/in.h>
41 #include <netinet/in_systm.h>
42 #include <arpa/inet.h>
43 #ifndef __USE_BSD
44 #define __USE_BSD  /* on linux use bsd version of iphdr (more portable) */
45 #endif /* __USE_BSD */
46 #include <netinet/ip.h>
47 #define __FAVOR_BSD /* on linux use bsd version of udphdr (more portable) */
48 #include <netinet/udp.h>
49 #include <netdb.h>
50 #ifdef __DragonFly__
51 #include <err.h>
52 #endif
53 
54 #include "raw_sock.h"
55 
56 /* likely/unlikely */
57 #if __GNUC__ >= 3
58 
59 #define likely(expr)              __builtin_expect(!!(expr), 1)
60 #define unlikely(expr)            __builtin_expect(!!(expr), 0)
61 
62 #else /* __GNUC__ */
63 
64 /* #warning "No compiler optimizations supported try gcc 4.x" */
65 #define likely(expr) (expr)
66 #define unlikely(expr) (expr)
67 
68 #endif /* __GNUC__ */
69 
70 
71 // macros for converting values in the expected format
72 // #if OS == "freebsd" || OS == "netbsd" || OS == "darwin"
73 /* on freebsd and netbsd the ip offset (along with flags) and the
74    ip header length must be filled in _host_ bytes order format.
75    The same is true for openbsd < 2.1.
76 */
77 #if defined(RAW_IPHDR_IP_HBO)
78 
79 /** convert the ip offset in the format expected by the kernel. */
80 #define RAW_IPHDR_IP_OFF(off) (unsigned short)(off)
81 /** convert the ip total length in the format expected by the kernel. */
82 #define RAW_IPHDR_IP_LEN(tlen) (unsigned short)(tlen)
83 
84 #else /* __OS_* */
85 /* linux, openbsd >= 2.1 a.s.o. */
86 /** convert the ip offset in the format expected by the kernel. */
87 #define RAW_IPHDR_IP_OFF(off)  htons((unsigned short)(off))
88 /** convert the ip total length in the format expected by the kernel. */
89 #define RAW_IPHDR_IP_LEN(tlen) htons((unsigned short)(tlen))
90 
91 #endif /* __OS_* */
92 
93 /** create and return a raw socket.
94  * @param proto - protocol used (e.g. IPPROTO_UDP, IPPROTO_RAW)
95  * @param ip - if not null the socket will be bound on this ip.
96  * @param iphdr_incl - set to 1 if packets send on this socket include
97  *                     a pre-built ip header (some fields, like the checksum
98  *                     will still be filled by the kernel, OTOH packet
99  *                     fragmentation has to be done in user space).
100  * @return socket on success, -1 on error
101  */
raw_socket(int proto,sockaddr_storage * ip,int iphdr_incl)102 int raw_socket(int proto, sockaddr_storage* ip, int iphdr_incl)
103 {
104 	int sock;
105 	int t;
106 
107 	sock = socket(PF_INET, SOCK_RAW, proto);
108 	if (sock==-1)
109 	  goto error;
110 	/* set socket options */
111 	if (iphdr_incl) {
112 	  t=1;
113 	  if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &t, sizeof(t))<0){
114 	    ERROR("raw_socket: setsockopt(IP_HDRINCL) failed: %s [%d]\n",
115 		  strerror(errno), errno);
116 	    goto error;
117 	  }
118 	} else {
119 	  /* IP_PKTINFO makes no sense if the ip header is included */
120 	  /* using IP_PKTINFO */
121 	  t=1;
122 #ifdef IP_PKTINFO
123 	  if (setsockopt(sock, IPPROTO_IP, IP_PKTINFO, &t, sizeof(t))<0){
124 	    ERROR("raw_socket: setsockopt(IP_PKTINFO) failed: %s [%d]\n",
125 		  strerror(errno), errno);
126 	    goto error;
127 	  }
128 #elif defined(IP_RECVDSTADDR)
129 	  if (setsockopt(sock, IPPROTO_IP, IP_RECVDSTADDR, &t, sizeof(t))<0){
130 	    ERROR("raw_socket: setsockop(IP_RECVDSTADDR) failed: %s [%d]\n",
131 		  strerror(errno), errno);
132 	    goto error;
133 	  }
134 #else
135 #error "no method of getting the destination ip address supported"
136 #endif /* IP_RECVDSTADDR / IP_PKTINFO */
137 	}
138 #if defined (IP_MTU_DISCOVER) && defined (IP_PMTUDISC_DONT)
139 	t=IP_PMTUDISC_DONT;
140 	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
141 	  ERROR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n", strerror(errno));
142 	  goto error;
143 	}
144 #endif /* IP_MTU_DISCOVER && IP_PMTUDISC_DONT */
145 	/* FIXME: probe_max_receive_buffer(sock) missing */
146 	if (ip){
147 	  if (bind(sock, (sockaddr*)ip, SA_len(ip))==-1){
148 	    char ip_str[NI_MAXHOST];
149 	    ERROR("raw_socket: bind(%s) failed: %s [%d]\n",
150 		  am_inet_ntop(ip,ip_str,NI_MAXHOST), strerror(errno), errno);
151 	    goto error;
152 	  }
153 	}
154 	return sock;
155 error:
156 	if (sock!=-1) close(sock);
157 	return -1;
158 }
159 
160 
161 
162 /** create and return an udp over ipv4  raw socket.
163  * @param iphdr_incl - set to 1 if packets send on this socket include
164  *                     a pre-built ip header (some fields, like the checksum
165  *                     will still be filled by the kernel, OTOH packet
166  *                     fragmentation has to be done in user space).
167  * @return socket on success, -1 on error
168  */
raw_udp_socket(int iphdr_incl)169 int raw_udp_socket(int iphdr_incl)
170 {
171   return raw_socket(IPPROTO_UDP, NULL, iphdr_incl);
172 }
173 
174 
175 
176 /** receives an ipv4 packet using a raw socket.
177  * An ipv4 packet is received in buf, using IP_PKTINFO or IP_RECVDSTADDR.
178  * from and to are filled (only the ip part the ports are 0 since this
179  * function doesn't try to look beyond the IP level).
180  * @param sock - raw socket
181  * @param buf - detination buffer.
182  * @param len - buffer len (should be enough for receiving a packet +
183  *               IP header).
184  * @param from - result parameter, the IP address part of it will be filled
185  *                with the source address and the port with 0.
186  * @param to - result parameter, the IP address part of it will be filled
187  *                with the destination (local) address and the port with 0.
188  * @return packet len or <0 on error: -1 (check errno),
189  *        -2 no IP_PKTINFO/IP_RECVDSTADDR found or AF mismatch
190  */
191 // int recvpkt4(int sock, char* buf, int len,
192 // 	     sockaddr_storage* from, sockaddr_storage* to)
193 // {
194 // 	struct iovec iov[1];
195 // 	struct msghdr rcv_msg;
196 // 	struct cmsghdr* cmsg;
197 // #ifdef IP_PKTINFO
198 // 	struct in_pktinfo* rcv_pktinfo;
199 // #endif /* IP_PKTINFO */
200 // 	int n, ret;
201 // 	char msg_ctrl_buf[1024];
202 
203 // 	iov[0].iov_base=buf;
204 // 	iov[0].iov_len=len;
205 // 	rcv_msg.msg_name=from;
206 // 	rcv_msg.msg_namelen=sizeof(sockaddr_storage);
207 // 	rcv_msg.msg_control=msg_ctrl_buf;
208 // 	rcv_msg.msg_controllen=sizeof(msg_ctrl_buf);
209 // 	rcv_msg.msg_iov=&iov[0];
210 // 	rcv_msg.msg_iovlen=1;
211 // 	ret=-2; /* no PKT_INFO or AF mismatch */
212 // retry:
213 // 	n=recvmsg(sock, &rcv_msg, MSG_WAITALL);
214 // 	if (unlikely(n==-1)){
215 // 	  if (errno==EINTR)
216 // 	    goto retry;
217 // 	  ret=n;
218 // 	  goto end;
219 // 	}
220 // 	/* find the pkt info */
221 // 	for (cmsg=CMSG_FIRSTHDR(&rcv_msg); cmsg;
222 // 	     cmsg=CMSG_NXTHDR(&rcv_msg, cmsg)){
223 // #ifdef IP_PKTINFO
224 // 	  if ((cmsg->cmsg_level==IPPROTO_IP) &&
225 // 	      (cmsg->cmsg_type==IP_PKTINFO)) {
226 // 	    rcv_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
227 // 	    to->ss_family=AF_INET;
228 // 	    memcpy(&SAv4(to)->sin_addr, &rcv_pktinfo->ipi_spec_dst.s_addr
229 // 		   sizeof(in_addr));
230 // 	    am_set_port(to,0); /* not known */
231 // 	    /* interface no. in ipi_ifindex */
232 // 	    ret=n; /* success */
233 // 	    break;
234 // 	  }
235 // #elif defined (IP_RECVDSTADDR)
236 // 	  if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
237 // 		     (cmsg->cmsg_type==IP_RECVDSTADDR))) {
238 // 	    to->ss_family=AF_INET;
239 // 	    memcpy(&SAv4(to)->sin_addr, CMSG_DATA(cmsg),
240 // 		   sizeof(in_addr));
241 // 	    am_set_port(to,0); /* not known */
242 // 	    ret=n; /* success */
243 // 	    break;
244 // 	  }
245 // #else
246 // #error "no method of getting the destination ip address supported"
247 // #endif /* IP_PKTINFO / IP_RECVDSTADDR */
248 // 	}
249 //  end:
250 // 	return ret;
251 // }
252 
253 
254 
255 /* receive an ipv4 udp packet over a raw socket.
256  * The packet is copied in *buf and *buf is advanced to point to the
257  * payload.  Fills from and to.
258  * @param rsock - raw socket
259  * @param buf - the packet will be written to where *buf points intially and
260  *              then *buf will be advanced to point to the udp payload.
261  * @param len - buffer length (should be enough to hold at least the
262  *               ip and udp headers + 1 byte).
263  * @param from - result parameter, filled with source address and port of the
264  *               packet.
265  * @param from - result parameter, filled with destination (local) address and
266  *               port of the packet.
267  * @param rf   - filter used to decide whether or not the packet is
268  *                accepted/processed. If null, all the packets are accepted.
269  * @return packet len or  <0 on error (-1 and -2 on recv error @see recvpkt4,
270  *         -3 if the headers are invalid and -4 if the packet doesn't
271  *         match the  filter).
272  */
273 // int raw_udp4_recv(int rsock, char** buf, int len,
274 // 		  sockaddr_storage* from, sockaddr_storage* to)
275 // {
276 // 	int n;
277 // 	unsigned short dst_port;
278 // 	unsigned short src_port;
279 // 	//struct ip_addr dst_ip;
280 // 	char* end;
281 // 	char* udph_start;
282 // 	char* udp_payload;
283 // 	struct ip iph;
284 // 	struct udphdr udph;
285 // 	unsigned short udp_len;
286 
287 // 	n=recvpkt4(rsock, *buf, len, from, to);
288 // 	if (unlikely(n<0)) goto error;
289 
290 // 	end=*buf+n;
291 // 	if (unlikely(n<(sizeof(struct ip)+sizeof(struct udphdr)))) {
292 // 		n=-3;
293 // 		goto error;
294 // 	}
295 
296 // 	/* FIXME: if initial buffer is aligned, one could skip the memcpy
297 // 	   and directly cast ip and udphdr pointer to the memory */
298 // 	memcpy(&iph, *buf, sizeof(struct ip));
299 // 	udph_start=*buf+iph.ip_hl*4;
300 // 	udp_payload=udph_start+sizeof(struct udphdr);
301 // 	if (unlikely(udp_payload>end)){
302 // 		n=-3;
303 // 		goto error;
304 // 	}
305 // 	memcpy(&udph, udph_start, sizeof(struct udphdr));
306 // 	udp_len=ntohs(udph.uh_ulen);
307 // 	if (unlikely((udph_start+udp_len)!=end)){
308 // 		if ((udph_start+udp_len)>end){
309 // 			n=-3;
310 // 			goto error;
311 // 		}else{
312 // 			ERROR("udp length too small: %d/%d\n",
313 // 			      (int)udp_len, (int)(end-udph_start));
314 // 			n=-3;
315 // 			goto error;
316 // 		}
317 // 	}
318 // 	/* advance buf */
319 // 	*buf=udp_payload;
320 // 	n=(int)(end-*buf);
321 // 	/* fill ip from the packet (needed if no PKT_INFO is used) */
322 // 	dst_ip.af=AF_INET;
323 // 	dst_ip.len=4;
324 // 	dst_ip.u.addr32[0]=iph.ip_dst.s_addr;
325 // 	/* fill dst_port */
326 // 	dst_port=ntohs(udph.uh_dport);
327 // 	ip_addr2su(to, &dst_ip, dst_port);
328 // 	/* fill src_port */
329 // 	src_port=ntohs(udph.uh_sport);
330 // 	su_setport(from, src_port);
331 // 	// if (likely(rf)) {
332 // 	// 	su2ip_addr(&dst_ip, to);
333 // 	// 	if ( (dst_port && rf->port1 && ((dst_port<rf->port1) ||
334 // 	// 					(dst_port>rf->port2)) ) ||
335 // 	// 		(matchnet(&dst_ip, &rf->dst)!=1) ){
336 // 	// 		/* no match */
337 // 	// 		n=-4;
338 // 	// 		goto error;
339 // 	// 	}
340 // 	// }
341 // error:
342 // 	return n;
343 // }
344 
345 
346 
347 /** udp checksum helper: compute the pseudo-header 16-bit "sum".
348  * Computes the partial checksum (no complement) of the pseudo-header.
349  * It is meant to be used by udpv4_chksum().
350  * @param uh - filled udp header
351  * @param src - source ip address in network byte order.
352  * @param dst - destination ip address in network byte order.
353  * @param length - payload length (not including the udp header),
354  *                 in _host_ order.
355  * @return the partial checksum in host order
356  */
udpv4_vhdr_sum(struct udphdr * uh,struct in_addr * src,struct in_addr * dst,unsigned short length)357 inline unsigned short udpv4_vhdr_sum(struct udphdr* uh,
358 				     struct in_addr* src,
359 				     struct in_addr* dst,
360 				     unsigned short length)
361 {
362 	unsigned sum;
363 
364 	/* pseudo header */
365 	sum=(src->s_addr>>16)+(src->s_addr&0xffff)+
366 		(dst->s_addr>>16)+(dst->s_addr&0xffff)+
367 		htons(IPPROTO_UDP)+(uh->uh_ulen);
368 	/* udp header */
369 	sum+=(uh->uh_dport)+(uh->uh_sport)+(uh->uh_ulen) + 0 /*chksum*/;
370 	/* fold it */
371 	sum=(sum>>16)+(sum&0xffff);
372 	sum+=(sum>>16);
373 	/* no complement */
374 	return ntohs((unsigned short) sum);
375 }
376 
377 
378 
379 /** compute the udp over ipv4 checksum.
380  * @param u - filled udp header (except checksum).
381  * @param src - source ip v4 address, in _network_ byte order.
382  * @param dst - destination ip v4 address, int _network_ byte order.
383  * @param data - pointer to the udp payload.
384  * @param length - payload length, not including the udp header and in
385  *                 _host_ order. The length mist be <= 0xffff - 8
386  *                 (to allow space for the udp header).
387  * @return the checksum in _host_ order */
udpv4_chksum(struct udphdr * u,struct in_addr * src,struct in_addr * dst,unsigned char * data,unsigned short length)388 inline static unsigned short udpv4_chksum(struct udphdr* u,
389 					  struct in_addr* src,
390 					  struct in_addr* dst,
391 					  unsigned char* data,
392 					  unsigned short length)
393 {
394 	unsigned sum;
395 	unsigned char* end;
396 	sum=udpv4_vhdr_sum(u, src, dst, length);
397 	end=data+(length&(~0x1)); /* make sure it's even */
398 	/* TODO: 16 & 32 bit aligned version */
399 		/* not aligned */
400 		for(;data<end;data+=2){
401 			sum+=((data[0]<<8)+data[1]);
402 		}
403 		if (length&0x1)
404 			sum+=((*data)<<8);
405 
406 	/* fold it */
407 	sum=(sum>>16)+(sum&0xffff);
408 	sum+=(sum>>16);
409 	return (unsigned short)~sum;
410 }
411 
412 
413 
414 /** fill in an udp header.
415  * @param u - udp header that will be filled.
416  * @param from - source ip v4 address and port.
417  * @param to -   destination ip v4 address and port.
418  * @param buf - pointer to the payload.
419  * @param len - payload length (not including the udp header).
420  * @param do_chk - if set the udp checksum will be computed, else it will
421  *                 be set to 0.
422  * @return 0 on success, < 0 on error.
423  */
mk_udp_hdr(struct udphdr * u,const sockaddr_storage * from,const sockaddr_storage * to,unsigned char * buf,int len,int do_chk)424 inline static int mk_udp_hdr(struct udphdr* u,
425 			     const sockaddr_storage* from,
426 			     const sockaddr_storage* to,
427 			     unsigned char* buf, int len,
428 			     int do_chk)
429 {
430   struct sockaddr_in *from_v4 = (sockaddr_in*)from;
431   struct sockaddr_in *to_v4 = (sockaddr_in*)to;
432 
433   u->uh_ulen = htons((unsigned short)len+sizeof(struct udphdr));
434   u->uh_sport = ((sockaddr_in*)from)->sin_port;
435   u->uh_dport = ((sockaddr_in*)to)->sin_port;
436   if (do_chk)
437     u->uh_sum=htons(udpv4_chksum(u, &from_v4->sin_addr,
438 				 &to_v4->sin_addr, buf, len));
439   else
440     u->uh_sum=0; /* no checksum */
441   return 0;
442 }
443 
444 
445 
446 /** fill in an ip header.
447  * Note: the checksum is _not_ computed.
448  * WARNING: The ip header length and offset might be filled in
449  * _host_ byte order or network byte order (depending on the OS, for example
450  *  freebsd needs host byte order for raw sockets with IPHDR_INC, while
451  *  linux needs network byte order).
452  * @param iph - ip header that will be filled.
453  * @param from - source ip v4 address (network byte order).
454  * @param to -   destination ip v4 address (network byte order).
455  * @param payload len - payload length (not including the ip header).
456  * @param proto - protocol.
457  * @return 0 on success, < 0 on error.
458  */
mk_ip_hdr(struct ip * iph,struct in_addr * from,struct in_addr * to,int payload_len,unsigned char proto)459 inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from,
460 			    struct in_addr* to, int payload_len,
461 			    unsigned char proto)
462 {
463 	iph->ip_hl = sizeof(struct ip)/4;
464 	iph->ip_v = 4;
465 	iph->ip_tos = 0;
466 	/* on freebsd ip_len _must_ be in _host_ byte order instead
467 	   of network byte order. On linux the length is ignored (it's filled
468 	   automatically every time). */
469 	iph->ip_len = RAW_IPHDR_IP_LEN(payload_len + sizeof(struct ip));
470 	iph->ip_id = 0; /* 0 => will be filled automatically by the kernel */
471 	iph->ip_off = 0; /* frag.: first 3 bits=flags=0, last 13 bits=offset */
472 	iph->ip_ttl = 64;//cfg_get(core, core_cfg, udp4_raw_ttl);
473 	iph->ip_p = proto;
474 	iph->ip_src = *from;
475 	iph->ip_dst = *to;
476 	iph->ip_sum = 0;
477 
478 	return 0;
479 }
480 
481 
482 
483 /** send an udp packet over a non-ip_hdrincl raw socket.
484  * @param rsock - raw socket
485  * @param buf - data
486  * @param len - data len
487  * @param from - source address:port (_must_ be non-null, but the ip address
488  *                can be 0, in which case it will be filled by the kernel).
489  * @param to - destination address:port
490  * @return  <0 on error (errno set too), number of bytes sent on success
491  *          (including the udp header => on success len + udpheader size).
492  */
raw_udp4_send(int rsock,char * buf,unsigned int len,sockaddr_storage * from,sockaddr_storage * to)493 int raw_udp4_send(int rsock, char* buf, unsigned int len,
494 		  sockaddr_storage* from, sockaddr_storage* to)
495 {
496 	struct msghdr snd_msg;
497 	struct cmsghdr* cmsg;
498 #ifdef IP_PKTINFO
499 	struct in_pktinfo* snd_pktinfo;
500 #endif /* IP_PKTINFO */
501 	struct iovec iov[2];
502 	struct udphdr udp_hdr;
503 	char msg_ctrl_snd_buf[1024];
504 	int ret;
505 
506 	memset(&snd_msg, 0, sizeof(snd_msg));
507 	snd_msg.msg_name=SAv4(to);
508 	snd_msg.msg_namelen=SA_len(to);
509 	snd_msg.msg_iov=&iov[0];
510 	/* prepare udp header */
511 	mk_udp_hdr(&udp_hdr, from, to, (unsigned char*)buf, len, 1);
512 	iov[0].iov_base=(char*)&udp_hdr;
513 	iov[0].iov_len=sizeof(udp_hdr);
514 	iov[1].iov_base=buf;
515 	iov[1].iov_len=len;
516 	snd_msg.msg_iovlen=2;
517 	snd_msg.msg_control=msg_ctrl_snd_buf;
518 	snd_msg.msg_controllen=sizeof(msg_ctrl_snd_buf);
519 	/* init pktinfo cmsg */
520 	cmsg=CMSG_FIRSTHDR(&snd_msg);
521 	cmsg->cmsg_level=IPPROTO_IP;
522 #ifdef IP_PKTINFO
523 	cmsg->cmsg_type=IP_PKTINFO;
524 	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
525 	snd_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
526 	snd_pktinfo->ipi_ifindex=0;
527 	snd_pktinfo->ipi_spec_dst.s_addr=SAv4(&from)->sin_addr.s_addr;
528 #elif defined (IP_SENDSRCADDR)
529 	cmsg->cmsg_type=IP_SENDSRCADDR;
530 	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
531 	memcpy(CMSG_DATA(cmsg), &SAv4(&from)->sin_addr.s_addr,
532 	       sizeof(struct in_addr));
533 #else
534 #ifdef __DragonFly__
535 	errx(1, "missing support for IP_SENDSRCADDR"); /* XXX FIXME */
536 #else
537 #error "no method of setting the source ip supported"
538 #endif
539 #endif /* IP_PKTINFO / IP_SENDSRCADDR */
540 	snd_msg.msg_controllen=cmsg->cmsg_len;
541 	snd_msg.msg_flags=0;
542 	ret=sendmsg(rsock, &snd_msg, 0);
543 	return ret;
544 }
545 
546 
547 
548 /** send an udp packet over an IP_HDRINCL raw socket.
549  * If needed, send several fragments.
550  * @param rsock - raw socket
551  * @param buf - data
552  * @param len - data len
553  * @param from - source address:port (_must_ be non-null, but the ip address
554  *                can be 0, in which case it will be filled by the kernel).
555  * @param to - destination address:port
556  * @param mtu - maximum datagram size (including the ip header, excluding
557  *              link layer headers). Minimum allowed size is 28
558  *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
559  *               be ignored (the packet will be sent un-fragmented).
560  *              0 can be used to disable fragmentation.
561  * @return  <0 on error (-2: datagram too big, -1: check errno),
562  *          number of bytes sent on success
563  *          (including the ip & udp headers =>
564  *               on success len + udpheader + ipheader size).
565  */
raw_iphdr_udp4_send(int rsock,const char * buf,unsigned int len,const sockaddr_storage * from,const sockaddr_storage * to,unsigned short mtu)566 int raw_iphdr_udp4_send(int rsock, const char* buf, unsigned int len,
567 			const sockaddr_storage* from,
568 			const sockaddr_storage* to,
569 			unsigned short mtu)
570 {
571 	struct msghdr snd_msg;
572 	struct iovec iov[2];
573 	struct ip_udp_hdr {
574 		struct ip ip;
575 		struct udphdr udp;
576 	} hdr;
577 	unsigned int totlen;
578 #ifndef RAW_IPHDR_INC_AUTO_FRAG
579 	unsigned int ip_frag_size; /* fragment size */
580 	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
581 	unsigned int ip_payload;
582 	unsigned int last_frag_offs;
583 	void* last_frag_start;
584 	int frg_no;
585 #endif /* RAW_IPHDR_INC_AUTO_FRAG */
586 	int ret;
587 
588 	totlen = len + sizeof(hdr);
589 	if (unlikely(totlen) > 65535)
590 		return -2;
591 	memset(&snd_msg, 0, sizeof(snd_msg));
592 	snd_msg.msg_name=SAv4(to);
593 	snd_msg.msg_namelen=SA_len(to);
594 	snd_msg.msg_iov=&iov[0];
595 	/* prepare the udp & ip headers */
596 	mk_udp_hdr(&hdr.udp, from, to, (unsigned char*)buf, len, 1);
597 	mk_ip_hdr(&hdr.ip, &SAv4(from)->sin_addr, &SAv4(to)->sin_addr,
598 		  len + sizeof(hdr.udp), IPPROTO_UDP);
599 	iov[0].iov_base=(char*)&hdr;
600 	iov[0].iov_len=sizeof(hdr);
601 	snd_msg.msg_iovlen=2;
602 	snd_msg.msg_control=0;
603 	snd_msg.msg_controllen=0;
604 	snd_msg.msg_flags=0;
605 	/* this part changes for different fragments */
606 	/* packets are fragmented if mtu has a valid value (at least an
607 	   IP header + UDP header fit in it) and if the total length is greater
608 	   then the mtu */
609 #ifndef RAW_IPHDR_INC_AUTO_FRAG
610 	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
611 #endif /* RAW_IPHDR_INC_AUTO_FRAG */
612 	  iov[1].iov_base=(void*)buf;
613 	  iov[1].iov_len=len;
614 	  ret=sendmsg(rsock, &snd_msg, 0);
615 #ifndef RAW_IPHDR_INC_AUTO_FRAG
616 	} else {
617 	  int bytes_sent;
618 	  ip_payload = len + sizeof(hdr.udp);
619 	  /* a fragment offset must be a multiple of 8 => its size must
620 	     also be a multiple of 8, except for the last fragment */
621 	  ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
622 	  last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
623 	  frg_no = ip_payload / ip_frag_size +
624 	    ((ip_payload % ip_frag_size) > last_frag_extra);
625 	  /*ip_last_frag_size = ip_payload % frag_size +
626 	    ((ip_payload % frag_size) <= last_frag_extra) *
627 	    ip_frag_size; */
628 	  last_frag_offs = (frg_no - 1) * ip_frag_size;
629 	  /* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
630 	     => last_frag_offs >= sizeof(hdr.udp) */
631 	  last_frag_start = (void*)(buf + last_frag_offs - sizeof(hdr.udp));
632 	  /* random id, should be != 0
633 	     (if 0 the kernel will fill it) */
634 	  hdr.ip.ip_id = 0; //fastrand_max(65534) + 1;
635 	  /* send the first fragment */
636 	  iov[1].iov_base=(void*)buf;
637 	  /* ip_frag_size >= sizeof(hdr.udp) because we are here only
638 	     if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
639 	  iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
640 	  hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
641 	  hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
642 	  ret=sendmsg(rsock, &snd_msg, 0);
643 	  if (unlikely(ret < 0))
644 	    goto end;
645 	  bytes_sent = ret;
646 	  /* all the other fragments, include only the ip header */
647 	  iov[0].iov_len = sizeof(hdr.ip);
648 	  iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
649 	  /* fragments between the first and the last */
650 	  while(unlikely(iov[1].iov_base < last_frag_start)) {
651 	    iov[1].iov_len = ip_frag_size;
652 	    hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
653 	    /* set MF  */
654 	    hdr.ip.ip_off =
655 	      RAW_IPHDR_IP_OFF((unsigned short)
656 			       (((char*)iov[1].iov_base
657 				 - (char*)buf + sizeof(hdr.udp))
658 				/ 8) | 0x2000 );
659 	    ret=sendmsg(rsock, &snd_msg, 0);
660 	    if (unlikely(ret < 0))
661 	      goto end;
662 	    bytes_sent+=ret;
663 	    iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
664 	  }
665 	  /* last fragment */
666 	  iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
667 	  hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
668 	  /* don't set MF (last fragment) */
669 	  hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
670 					   (((char*)iov[1].iov_base
671 					     - (char*)buf + sizeof(hdr.udp))
672 					    / 8) );
673 	  ret=sendmsg(rsock, &snd_msg, 0);
674 	  if (unlikely(ret < 0))
675 	    goto end;
676 	  ret+=bytes_sent;
677 	}
678 end:
679 #endif /* RAW_IPHDR_INC_AUTO_FRAG */
680 	return ret;
681 }
682