1 /* 2 * COPYRIGHT: See COPYING in the top level directory 3 * PROJECT: ReactOS TCP/IP protocol driver 4 * FILE: include/tcpcore.h 5 * PURPOSE: Transmission Control Protocol definitions 6 * REVISIONS: 7 * CSH 01/01-2003 Ported from linux kernel 2.4.20 8 */ 9 10 /* 11 * INET An implementation of the TCP/IP protocol suite for the LINUX 12 * operating system. INET is implemented using the BSD Socket 13 * interface as the means of communication with the user level. 14 * 15 * Definitions for the TCP module. 16 * 17 * Version: @(#)tcp.h 1.0.5 05/23/93 18 * 19 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 20 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28 #pragma once 29 30 #include "tcpdef.h" 31 32 33 struct socket; 34 35 36 37 #if 1 /* skbuff */ 38 39 #define HAVE_ALLOC_SKB /* For the drivers to know */ 40 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 41 #define SLAB_SKB /* Slabified skbuffs */ 42 43 #define CHECKSUM_NONE 0 44 #define CHECKSUM_HW 1 45 #define CHECKSUM_UNNECESSARY 2 46 47 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) 48 #define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) 49 #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) 50 #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) 51 52 /* A. Checksumming of received packets by device. 53 * 54 * NONE: device failed to checksum this packet. 55 * skb->csum is undefined. 56 * 57 * UNNECESSARY: device parsed packet and wouldbe verified checksum. 58 * skb->csum is undefined. 59 * It is bad option, but, unfortunately, many of vendors do this. 60 * Apparently with secret goal to sell you new device, when you 61 * will add new protocol to your host. F.e. IPv6. 8) 62 * 63 * HW: the most generic way. Device supplied checksum of _all_ 64 * the packet as seen by netif_rx in skb->csum. 65 * NOTE: Even if device supports only some protocols, but 66 * is able to produce some skb->csum, it MUST use HW, 67 * not UNNECESSARY. 68 * 69 * B. Checksumming on output. 70 * 71 * NONE: skb is checksummed by protocol or csum is not required. 72 * 73 * HW: device is required to csum packet as seen by hard_start_xmit 74 * from skb->h.raw to the end and to record the checksum 75 * at skb->h.raw+skb->csum. 76 * 77 * Device must show its capabilities in dev->features, set 78 * at device setup time. 79 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 80 * everything. 81 * NETIF_F_NO_CSUM - loopback or reliable single hop media. 82 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 83 * TCP/UDP over IPv4. Sigh. Vendors like this 84 * way by an unknown reason. Though, see comment above 85 * about CHECKSUM_UNNECESSARY. 8) 86 * 87 * Any questions? No questions, good. --ANK 88 */ 89 90 #ifdef __i386__ 91 #define NET_CALLER(arg) (*(((void**)&arg)-1)) 92 #else 93 #define NET_CALLER(arg) __builtin_return_address(0) 94 #endif 95 96 #ifdef CONFIG_NETFILTER 97 struct nf_conntrack { 98 atomic_t use; 99 void (*destroy)(struct nf_conntrack *); 100 }; 101 102 struct nf_ct_info { 103 struct nf_conntrack *master; 104 }; 105 #endif 106 107 struct sk_buff_head { 108 /* These two members must be first. */ 109 struct sk_buff * next; 110 struct sk_buff * prev; 111 112 __u32 qlen; 113 spinlock_t lock; 114 }; 115 116 struct sk_buff; 117 118 #define MAX_SKB_FRAGS 6 119 120 typedef struct skb_frag_struct skb_frag_t; 121 122 struct skb_frag_struct 123 { 124 struct page *page; 125 __u16 page_offset; 126 __u16 size; 127 }; 128 129 /* This data is invariant across clones and lives at 130 * the end of the header data, ie. at skb->end. 131 */ 132 struct skb_shared_info { 133 atomic_t dataref; 134 unsigned int nr_frags; 135 struct sk_buff *frag_list; 136 skb_frag_t frags[MAX_SKB_FRAGS]; 137 }; 138 139 struct sk_buff { 140 /* These two members must be first. */ 141 struct sk_buff * next; /* Next buffer in list */ 142 struct sk_buff * prev; /* Previous buffer in list */ 143 144 struct sk_buff_head * list; /* List we are on */ 145 struct sock *sk; /* Socket we are owned by */ 146 struct timeval stamp; /* Time we arrived */ 147 struct net_device *dev; /* Device we arrived on/are leaving by */ 148 149 /* Transport layer header */ 150 union 151 { 152 struct tcphdr *th; 153 struct udphdr *uh; 154 struct icmphdr *icmph; 155 struct igmphdr *igmph; 156 struct iphdr *ipiph; 157 struct spxhdr *spxh; 158 unsigned char *raw; 159 } h; 160 161 /* Network layer header */ 162 union 163 { 164 struct iphdr *iph; 165 struct ipv6hdr *ipv6h; 166 struct arphdr *arph; 167 struct ipxhdr *ipxh; 168 unsigned char *raw; 169 } nh; 170 171 /* Link layer header */ 172 union 173 { 174 struct ethhdr *ethernet; 175 unsigned char *raw; 176 } mac; 177 178 struct dst_entry *dst; 179 180 /* 181 * This is the control buffer. It is free to use for every 182 * layer. Please put your private variables there. If you 183 * want to keep them across layers you have to do a skb_clone() 184 * first. This is owned by whoever has the skb queued ATM. 185 */ 186 char cb[48]; 187 188 unsigned int len; /* Length of actual data */ 189 unsigned int data_len; 190 unsigned int csum; /* Checksum */ 191 unsigned char __unused, /* Dead field, may be reused */ 192 cloned, /* head may be cloned (check refcnt to be sure). */ 193 pkt_type, /* Packet class */ 194 ip_summed; /* Driver fed us an IP checksum */ 195 __u32 priority; /* Packet queueing priority */ 196 atomic_t users; /* User count - see datagram.c,tcp.c */ 197 unsigned short protocol; /* Packet protocol from driver. */ 198 unsigned short security; /* Security level of packet */ 199 unsigned int truesize; /* Buffer size */ 200 201 unsigned char *head; /* Head of buffer */ 202 unsigned char *data; /* Data head pointer */ 203 unsigned char *tail; /* Tail pointer */ 204 unsigned char *end; /* End pointer */ 205 206 void (*destructor)(struct sk_buff *); /* Destruct function */ 207 #ifdef CONFIG_NETFILTER 208 /* Can be used for communication between hooks. */ 209 unsigned long nfmark; 210 /* Cache info */ 211 __u32 nfcache; 212 /* Associated connection, if any */ 213 struct nf_ct_info *nfct; 214 #ifdef CONFIG_NETFILTER_DEBUG 215 unsigned int nf_debug; 216 #endif 217 #endif /*CONFIG_NETFILTER*/ 218 219 #if defined(CONFIG_HIPPI) 220 union{ 221 __u32 ifield; 222 } private; 223 #endif 224 225 #ifdef CONFIG_NET_SCHED 226 __u32 tc_index; /* traffic control index */ 227 #endif 228 }; 229 230 #define SK_WMEM_MAX 65535 231 #define SK_RMEM_MAX 65535 232 233 #if 1 234 //#ifdef __KERNEL__ 235 /* 236 * Handling routines are only of interest to the kernel 237 */ 238 239 extern void __kfree_skb(struct sk_buff *skb); 240 extern struct sk_buff * alloc_skb(unsigned int size, int priority); 241 extern void kfree_skbmem(struct sk_buff *skb); 242 extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); 243 extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); 244 extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); 245 extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); 246 extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); 247 extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, 248 int newheadroom, 249 int newtailroom, 250 int priority); 251 #define dev_kfree_skb(a) kfree_skb(a) 252 extern void skb_over_panic(struct sk_buff *skb, int len, void *here); 253 extern void skb_under_panic(struct sk_buff *skb, int len, void *here); 254 255 /* Internal */ 256 #define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 257 258 /** 259 * skb_queue_empty - check if a queue is empty 260 * @list: queue head 261 * 262 * Returns true if the queue is empty, false otherwise. 263 */ 264 265 static __inline int skb_queue_empty(struct sk_buff_head *list) 266 { 267 return (list->next == (struct sk_buff *) list); 268 } 269 270 /** 271 * skb_get - reference buffer 272 * @skb: buffer to reference 273 * 274 * Makes another reference to a socket buffer and returns a pointer 275 * to the buffer. 276 */ 277 278 static __inline struct sk_buff *skb_get(struct sk_buff *skb) 279 { 280 atomic_inc(&skb->users); 281 return skb; 282 } 283 284 /* 285 * If users==1, we are the only owner and are can avoid redundant 286 * atomic change. 287 */ 288 289 /** 290 * kfree_skb - free an sk_buff 291 * @skb: buffer to free 292 * 293 * Drop a reference to the buffer and free it if the usage count has 294 * hit zero. 295 */ 296 297 static __inline void kfree_skb(struct sk_buff *skb) 298 { 299 if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 300 __kfree_skb(skb); 301 } 302 303 /* Use this if you didn't touch the skb state [for fast switching] */ 304 static __inline void kfree_skb_fast(struct sk_buff *skb) 305 { 306 if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 307 kfree_skbmem(skb); 308 } 309 310 /** 311 * skb_cloned - is the buffer a clone 312 * @skb: buffer to check 313 * 314 * Returns true if the buffer was generated with skb_clone() and is 315 * one of multiple shared copies of the buffer. Cloned buffers are 316 * shared data so must not be written to under normal circumstances. 317 */ 318 319 static __inline int skb_cloned(struct sk_buff *skb) 320 { 321 return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; 322 } 323 324 /** 325 * skb_shared - is the buffer shared 326 * @skb: buffer to check 327 * 328 * Returns true if more than one person has a reference to this 329 * buffer. 330 */ 331 332 static __inline int skb_shared(struct sk_buff *skb) 333 { 334 return (atomic_read(&skb->users) != 1); 335 } 336 337 /** 338 * skb_share_check - check if buffer is shared and if so clone it 339 * @skb: buffer to check 340 * @pri: priority for memory allocation 341 * 342 * If the buffer is shared the buffer is cloned and the old copy 343 * drops a reference. A new clone with a single reference is returned. 344 * If the buffer is not shared the original buffer is returned. When 345 * being called from interrupt status or with spinlocks held pri must 346 * be GFP_ATOMIC. 347 * 348 * NULL is returned on a memory allocation failure. 349 */ 350 351 static __inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) 352 { 353 if (skb_shared(skb)) { 354 struct sk_buff *nskb; 355 nskb = skb_clone(skb, pri); 356 kfree_skb(skb); 357 return nskb; 358 } 359 return skb; 360 } 361 362 363 /* 364 * Copy shared buffers into a new sk_buff. We effectively do COW on 365 * packets to handle cases where we have a local reader and forward 366 * and a couple of other messy ones. The normal one is tcpdumping 367 * a packet thats being forwarded. 368 */ 369 370 /** 371 * skb_unshare - make a copy of a shared buffer 372 * @skb: buffer to check 373 * @pri: priority for memory allocation 374 * 375 * If the socket buffer is a clone then this function creates a new 376 * copy of the data, drops a reference count on the old copy and returns 377 * the new copy with the reference count at 1. If the buffer is not a clone 378 * the original buffer is returned. When called with a spinlock held or 379 * from interrupt state @pri must be %GFP_ATOMIC 380 * 381 * %NULL is returned on a memory allocation failure. 382 */ 383 384 static __inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) 385 { 386 struct sk_buff *nskb; 387 if(!skb_cloned(skb)) 388 return skb; 389 nskb=skb_copy(skb, pri); 390 kfree_skb(skb); /* Free our shared copy */ 391 return nskb; 392 } 393 394 /** 395 * skb_peek 396 * @list_: list to peek at 397 * 398 * Peek an &sk_buff. Unlike most other operations you _MUST_ 399 * be careful with this one. A peek leaves the buffer on the 400 * list and someone else may run off with it. You must hold 401 * the appropriate locks or have a private queue to do this. 402 * 403 * Returns %NULL for an empty list or a pointer to the head element. 404 * The reference count is not incremented and the reference is therefore 405 * volatile. Use with caution. 406 */ 407 408 static __inline struct sk_buff *skb_peek(struct sk_buff_head *list_) 409 { 410 struct sk_buff *list = ((struct sk_buff *)list_)->next; 411 if (list == (struct sk_buff *)list_) 412 list = NULL; 413 return list; 414 } 415 416 /** 417 * skb_peek_tail 418 * @list_: list to peek at 419 * 420 * Peek an &sk_buff. Unlike most other operations you _MUST_ 421 * be careful with this one. A peek leaves the buffer on the 422 * list and someone else may run off with it. You must hold 423 * the appropriate locks or have a private queue to do this. 424 * 425 * Returns %NULL for an empty list or a pointer to the tail element. 426 * The reference count is not incremented and the reference is therefore 427 * volatile. Use with caution. 428 */ 429 430 static __inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) 431 { 432 struct sk_buff *list = ((struct sk_buff *)list_)->prev; 433 if (list == (struct sk_buff *)list_) 434 list = NULL; 435 return list; 436 } 437 438 /** 439 * skb_queue_len - get queue length 440 * @list_: list to measure 441 * 442 * Return the length of an &sk_buff queue. 443 */ 444 445 static __inline __u32 skb_queue_len(struct sk_buff_head *list_) 446 { 447 return(list_->qlen); 448 } 449 450 static __inline void skb_queue_head_init(struct sk_buff_head *list) 451 { 452 spin_lock_init(&list->lock); 453 list->prev = (struct sk_buff *)list; 454 list->next = (struct sk_buff *)list; 455 list->qlen = 0; 456 } 457 458 /* 459 * Insert an sk_buff at the start of a list. 460 * 461 * The "__skb_xxxx()" functions are the non-atomic ones that 462 * can only be called with interrupts disabled. 463 */ 464 465 /** 466 * __skb_queue_head - queue a buffer at the list head 467 * @list: list to use 468 * @newsk: buffer to queue 469 * 470 * Queue a buffer at the start of a list. This function takes no locks 471 * and you must therefore hold required locks before calling it. 472 * 473 * A buffer cannot be placed on two lists at the same time. 474 */ 475 476 static __inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 477 { 478 struct sk_buff *prev, *next; 479 480 newsk->list = list; 481 list->qlen++; 482 prev = (struct sk_buff *)list; 483 next = prev->next; 484 newsk->next = next; 485 newsk->prev = prev; 486 next->prev = newsk; 487 prev->next = newsk; 488 } 489 490 491 /** 492 * skb_queue_head - queue a buffer at the list head 493 * @list: list to use 494 * @newsk: buffer to queue 495 * 496 * Queue a buffer at the start of the list. This function takes the 497 * list lock and can be used safely with other locking &sk_buff functions 498 * safely. 499 * 500 * A buffer cannot be placed on two lists at the same time. 501 */ 502 503 static __inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 504 { 505 unsigned long flags; 506 507 spin_lock_irqsave(&list->lock, flags); 508 __skb_queue_head(list, newsk); 509 spin_unlock_irqrestore(&list->lock, flags); 510 } 511 512 /** 513 * __skb_queue_tail - queue a buffer at the list tail 514 * @list: list to use 515 * @newsk: buffer to queue 516 * 517 * Queue a buffer at the end of a list. This function takes no locks 518 * and you must therefore hold required locks before calling it. 519 * 520 * A buffer cannot be placed on two lists at the same time. 521 */ 522 523 524 static __inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 525 { 526 struct sk_buff *prev, *next; 527 528 newsk->list = list; 529 list->qlen++; 530 next = (struct sk_buff *)list; 531 prev = next->prev; 532 newsk->next = next; 533 newsk->prev = prev; 534 next->prev = newsk; 535 prev->next = newsk; 536 } 537 538 /** 539 * skb_queue_tail - queue a buffer at the list tail 540 * @list: list to use 541 * @newsk: buffer to queue 542 * 543 * Queue a buffer at the tail of the list. This function takes the 544 * list lock and can be used safely with other locking &sk_buff functions 545 * safely. 546 * 547 * A buffer cannot be placed on two lists at the same time. 548 */ 549 550 static __inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 551 { 552 unsigned long flags; 553 554 spin_lock_irqsave(&list->lock, flags); 555 __skb_queue_tail(list, newsk); 556 spin_unlock_irqrestore(&list->lock, flags); 557 } 558 559 /** 560 * __skb_dequeue - remove from the head of the queue 561 * @list: list to dequeue from 562 * 563 * Remove the head of the list. This function does not take any locks 564 * so must be used with appropriate locks held only. The head item is 565 * returned or %NULL if the list is empty. 566 */ 567 568 static __inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 569 { 570 struct sk_buff *next, *prev, *result; 571 572 prev = (struct sk_buff *) list; 573 next = prev->next; 574 result = NULL; 575 if (next != prev) { 576 result = next; 577 next = next->next; 578 list->qlen--; 579 next->prev = prev; 580 prev->next = next; 581 result->next = NULL; 582 result->prev = NULL; 583 result->list = NULL; 584 } 585 return result; 586 } 587 588 /** 589 * skb_dequeue - remove from the head of the queue 590 * @list: list to dequeue from 591 * 592 * Remove the head of the list. The list lock is taken so the function 593 * may be used safely with other locking list functions. The head item is 594 * returned or %NULL if the list is empty. 595 */ 596 597 static __inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) 598 { 599 unsigned long flags; 600 struct sk_buff *result; 601 602 spin_lock_irqsave(&list->lock, flags); 603 result = __skb_dequeue(list); 604 spin_unlock_irqrestore(&list->lock, flags); 605 return result; 606 } 607 608 /* 609 * Insert a packet on a list. 610 */ 611 612 static __inline void __skb_insert(struct sk_buff *newsk, 613 struct sk_buff * prev, struct sk_buff *next, 614 struct sk_buff_head * list) 615 { 616 newsk->next = next; 617 newsk->prev = prev; 618 next->prev = newsk; 619 prev->next = newsk; 620 newsk->list = list; 621 list->qlen++; 622 } 623 624 /** 625 * skb_insert - insert a buffer 626 * @old: buffer to insert before 627 * @newsk: buffer to insert 628 * 629 * Place a packet before a given packet in a list. The list locks are taken 630 * and this function is atomic with respect to other list locked calls 631 * A buffer cannot be placed on two lists at the same time. 632 */ 633 634 static __inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 635 { 636 unsigned long flags; 637 638 spin_lock_irqsave(&old->list->lock, flags); 639 __skb_insert(newsk, old->prev, old, old->list); 640 spin_unlock_irqrestore(&old->list->lock, flags); 641 } 642 643 /* 644 * Place a packet after a given packet in a list. 645 */ 646 647 static __inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) 648 { 649 __skb_insert(newsk, old, old->next, old->list); 650 } 651 652 /** 653 * skb_append - append a buffer 654 * @old: buffer to insert after 655 * @newsk: buffer to insert 656 * 657 * Place a packet after a given packet in a list. The list locks are taken 658 * and this function is atomic with respect to other list locked calls. 659 * A buffer cannot be placed on two lists at the same time. 660 */ 661 662 663 static __inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) 664 { 665 unsigned long flags; 666 667 spin_lock_irqsave(&old->list->lock, flags); 668 __skb_append(old, newsk); 669 spin_unlock_irqrestore(&old->list->lock, flags); 670 } 671 672 /* 673 * remove sk_buff from list. _Must_ be called atomically, and with 674 * the list known.. 675 */ 676 677 static __inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 678 { 679 struct sk_buff * next, * prev; 680 681 list->qlen--; 682 next = skb->next; 683 prev = skb->prev; 684 skb->next = NULL; 685 skb->prev = NULL; 686 skb->list = NULL; 687 next->prev = prev; 688 prev->next = next; 689 } 690 691 /** 692 * skb_unlink - remove a buffer from a list 693 * @skb: buffer to remove 694 * 695 * Place a packet after a given packet in a list. The list locks are taken 696 * and this function is atomic with respect to other list locked calls 697 * 698 * Works even without knowing the list it is sitting on, which can be 699 * handy at times. It also means that THE LIST MUST EXIST when you 700 * unlink. Thus a list must have its contents unlinked before it is 701 * destroyed. 702 */ 703 704 static __inline void skb_unlink(struct sk_buff *skb) 705 { 706 struct sk_buff_head *list = skb->list; 707 708 if(list) { 709 unsigned long flags; 710 711 spin_lock_irqsave(&list->lock, flags); 712 if(skb->list == list) 713 __skb_unlink(skb, skb->list); 714 spin_unlock_irqrestore(&list->lock, flags); 715 } 716 } 717 718 /* XXX: more streamlined implementation */ 719 720 /** 721 * __skb_dequeue_tail - remove from the tail of the queue 722 * @list: list to dequeue from 723 * 724 * Remove the tail of the list. This function does not take any locks 725 * so must be used with appropriate locks held only. The tail item is 726 * returned or %NULL if the list is empty. 727 */ 728 729 static __inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 730 { 731 struct sk_buff *skb = skb_peek_tail(list); 732 if (skb) 733 __skb_unlink(skb, list); 734 return skb; 735 } 736 737 /** 738 * skb_dequeue - remove from the head of the queue 739 * @list: list to dequeue from 740 * 741 * Remove the head of the list. The list lock is taken so the function 742 * may be used safely with other locking list functions. The tail item is 743 * returned or %NULL if the list is empty. 744 */ 745 746 static __inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 747 { 748 unsigned long flags; 749 struct sk_buff *result; 750 751 spin_lock_irqsave(&list->lock, flags); 752 result = __skb_dequeue_tail(list); 753 spin_unlock_irqrestore(&list->lock, flags); 754 return result; 755 } 756 757 static __inline int skb_is_nonlinear(const struct sk_buff *skb) 758 { 759 return skb->data_len; 760 } 761 762 static __inline int skb_headlen(const struct sk_buff *skb) 763 { 764 return skb->len - skb->data_len; 765 } 766 767 #define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0) 768 #define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0) 769 #define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0) 770 771 /* 772 * Add data to an sk_buff 773 */ 774 775 static __inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 776 { 777 unsigned char *tmp=skb->tail; 778 SKB_LINEAR_ASSERT(skb); 779 skb->tail+=len; 780 skb->len+=len; 781 return tmp; 782 } 783 784 /** 785 * skb_put - add data to a buffer 786 * @skb: buffer to use 787 * @len: amount of data to add 788 * 789 * This function extends the used data area of the buffer. If this would 790 * exceed the total buffer size the kernel will panic. A pointer to the 791 * first byte of the extra data is returned. 792 */ 793 794 static __inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 795 { 796 #if 0 797 unsigned char *tmp=skb->tail; 798 SKB_LINEAR_ASSERT(skb); 799 skb->tail+=len; 800 skb->len+=len; 801 if(skb->tail>skb->end) { 802 skb_over_panic(skb, len, current_text_addr()); 803 } 804 return tmp; 805 #else 806 return NULL; 807 #endif 808 } 809 810 static __inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 811 { 812 skb->data-=len; 813 skb->len+=len; 814 return skb->data; 815 } 816 817 /** 818 * skb_push - add data to the start of a buffer 819 * @skb: buffer to use 820 * @len: amount of data to add 821 * 822 * This function extends the used data area of the buffer at the buffer 823 * start. If this would exceed the total buffer headroom the kernel will 824 * panic. A pointer to the first byte of the extra data is returned. 825 */ 826 827 static __inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 828 { 829 #if 0 830 skb->data-=len; 831 skb->len+=len; 832 if(skb->data<skb->head) { 833 skb_under_panic(skb, len, current_text_addr()); 834 } 835 return skb->data; 836 #else 837 return NULL; 838 #endif 839 } 840 841 static __inline char *__skb_pull(struct sk_buff *skb, unsigned int len) 842 { 843 skb->len-=len; 844 if (skb->len < skb->data_len) 845 out_of_line_bug(); 846 return skb->data+=len; 847 } 848 849 /** 850 * skb_pull - remove data from the start of a buffer 851 * @skb: buffer to use 852 * @len: amount of data to remove 853 * 854 * This function removes data from the start of a buffer, returning 855 * the memory to the headroom. A pointer to the next data in the buffer 856 * is returned. Once the data has been pulled future pushes will overwrite 857 * the old data. 858 */ 859 860 static __inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) 861 { 862 if (len > skb->len) 863 return NULL; 864 return __skb_pull(skb,len); 865 } 866 867 extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); 868 869 static __inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) 870 { 871 if (len > skb_headlen(skb) && 872 __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) 873 return NULL; 874 skb->len -= len; 875 return skb->data += len; 876 } 877 878 static __inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) 879 { 880 if (len > skb->len) 881 return NULL; 882 return __pskb_pull(skb,len); 883 } 884 885 static __inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 886 { 887 if (len <= skb_headlen(skb)) 888 return 1; 889 if (len > skb->len) 890 return 0; 891 return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); 892 } 893 894 /** 895 * skb_headroom - bytes at buffer head 896 * @skb: buffer to check 897 * 898 * Return the number of bytes of free space at the head of an &sk_buff. 899 */ 900 901 static __inline int skb_headroom(const struct sk_buff *skb) 902 { 903 return skb->data-skb->head; 904 } 905 906 /** 907 * skb_tailroom - bytes at buffer end 908 * @skb: buffer to check 909 * 910 * Return the number of bytes of free space at the tail of an sk_buff 911 */ 912 913 static __inline int skb_tailroom(const struct sk_buff *skb) 914 { 915 return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; 916 } 917 918 /** 919 * skb_reserve - adjust headroom 920 * @skb: buffer to alter 921 * @len: bytes to move 922 * 923 * Increase the headroom of an empty &sk_buff by reducing the tail 924 * room. This is only allowed for an empty buffer. 925 */ 926 927 static __inline void skb_reserve(struct sk_buff *skb, unsigned int len) 928 { 929 skb->data+=len; 930 skb->tail+=len; 931 } 932 933 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); 934 935 static __inline void __skb_trim(struct sk_buff *skb, unsigned int len) 936 { 937 if (!skb->data_len) { 938 skb->len = len; 939 skb->tail = skb->data+len; 940 } else { 941 ___pskb_trim(skb, len, 0); 942 } 943 } 944 945 /** 946 * skb_trim - remove end from a buffer 947 * @skb: buffer to alter 948 * @len: new length 949 * 950 * Cut the length of a buffer down by removing data from the tail. If 951 * the buffer is already under the length specified it is not modified. 952 */ 953 954 static __inline void skb_trim(struct sk_buff *skb, unsigned int len) 955 { 956 if (skb->len > len) { 957 __skb_trim(skb, len); 958 } 959 } 960 961 962 static __inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 963 { 964 if (!skb->data_len) { 965 skb->len = len; 966 skb->tail = skb->data+len; 967 return 0; 968 } else { 969 return ___pskb_trim(skb, len, 1); 970 } 971 } 972 973 static __inline int pskb_trim(struct sk_buff *skb, unsigned int len) 974 { 975 if (len < skb->len) 976 return __pskb_trim(skb, len); 977 return 0; 978 } 979 980 /** 981 * skb_orphan - orphan a buffer 982 * @skb: buffer to orphan 983 * 984 * If a buffer currently has an owner then we call the owner's 985 * destructor function and make the @skb unowned. The buffer continues 986 * to exist but is no longer charged to its former owner. 987 */ 988 989 990 static __inline void skb_orphan(struct sk_buff *skb) 991 { 992 if (skb->destructor) 993 skb->destructor(skb); 994 skb->destructor = NULL; 995 skb->sk = NULL; 996 } 997 998 /** 999 * skb_purge - empty a list 1000 * @list: list to empty 1001 * 1002 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1003 * the list and one reference dropped. This function takes the list 1004 * lock and is atomic with respect to other list locking functions. 1005 */ 1006 1007 1008 static __inline void skb_queue_purge(struct sk_buff_head *list) 1009 { 1010 struct sk_buff *skb; 1011 while ((skb=skb_dequeue(list))!=NULL) 1012 kfree_skb(skb); 1013 } 1014 1015 /** 1016 * __skb_purge - empty a list 1017 * @list: list to empty 1018 * 1019 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1020 * the list and one reference dropped. This function does not take the 1021 * list lock and the caller must hold the relevant locks to use it. 1022 */ 1023 1024 1025 static __inline void __skb_queue_purge(struct sk_buff_head *list) 1026 { 1027 struct sk_buff *skb; 1028 while ((skb=__skb_dequeue(list))!=NULL) 1029 kfree_skb(skb); 1030 } 1031 1032 /** 1033 * __dev_alloc_skb - allocate an skbuff for sending 1034 * @length: length to allocate 1035 * @gfp_mask: get_free_pages mask, passed to alloc_skb 1036 * 1037 * Allocate a new &sk_buff and assign it a usage count of one. The 1038 * buffer has unspecified headroom built in. Users should allocate 1039 * the headroom they think they need without accounting for the 1040 * built in space. The built in space is used for optimisations. 1041 * 1042 * %NULL is returned in there is no free memory. 1043 */ 1044 1045 static __inline struct sk_buff *__dev_alloc_skb(unsigned int length, 1046 int gfp_mask) 1047 { 1048 struct sk_buff *skb; 1049 1050 skb = alloc_skb(length+16, gfp_mask); 1051 if (skb) 1052 skb_reserve(skb,16); 1053 return skb; 1054 } 1055 1056 /** 1057 * dev_alloc_skb - allocate an skbuff for sending 1058 * @length: length to allocate 1059 * 1060 * Allocate a new &sk_buff and assign it a usage count of one. The 1061 * buffer has unspecified headroom built in. Users should allocate 1062 * the headroom they think they need without accounting for the 1063 * built in space. The built in space is used for optimisations. 1064 * 1065 * %NULL is returned in there is no free memory. Although this function 1066 * allocates memory it can be called from an interrupt. 1067 */ 1068 1069 static __inline struct sk_buff *dev_alloc_skb(unsigned int length) 1070 { 1071 #if 0 1072 return __dev_alloc_skb(length, GFP_ATOMIC); 1073 #else 1074 return NULL; 1075 #endif 1076 } 1077 1078 /** 1079 * skb_cow - copy header of skb when it is required 1080 * @skb: buffer to cow 1081 * @headroom: needed headroom 1082 * 1083 * If the skb passed lacks sufficient headroom or its data part 1084 * is shared, data is reallocated. If reallocation fails, an error 1085 * is returned and original skb is not changed. 1086 * 1087 * The result is skb with writable area skb->head...skb->tail 1088 * and at least @headroom of space at head. 1089 */ 1090 1091 static __inline int 1092 skb_cow(struct sk_buff *skb, unsigned int headroom) 1093 { 1094 #if 0 1095 int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); 1096 1097 if (delta < 0) 1098 delta = 0; 1099 1100 if (delta || skb_cloned(skb)) 1101 return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); 1102 return 0; 1103 #else 1104 return 0; 1105 #endif 1106 } 1107 1108 /** 1109 * skb_linearize - convert paged skb to linear one 1110 * @skb: buffer to linarize 1111 * @gfp: allocation mode 1112 * 1113 * If there is no free memory -ENOMEM is returned, otherwise zero 1114 * is returned and the old skb data released. */ 1115 int skb_linearize(struct sk_buff *skb, int gfp); 1116 1117 static __inline void *kmap_skb_frag(const skb_frag_t *frag) 1118 { 1119 #if 0 1120 #ifdef CONFIG_HIGHMEM 1121 if (in_irq()) 1122 out_of_line_bug(); 1123 1124 local_bh_disable(); 1125 #endif 1126 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 1127 #else 1128 return NULL; 1129 #endif 1130 } 1131 1132 static __inline void kunmap_skb_frag(void *vaddr) 1133 { 1134 #if 0 1135 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 1136 #ifdef CONFIG_HIGHMEM 1137 local_bh_enable(); 1138 #endif 1139 #endif 1140 } 1141 1142 #define skb_queue_walk(queue, skb) \ 1143 for (skb = (queue)->next; \ 1144 (skb != (struct sk_buff *)(queue)); \ 1145 skb=skb->next) 1146 1147 1148 extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); 1149 extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); 1150 extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); 1151 extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); 1152 extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); 1153 extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); 1154 extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); 1155 1156 extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); 1157 extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); 1158 extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); 1159 extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 1160 1161 extern void skb_init(void); 1162 extern void skb_add_mtu(int mtu); 1163 1164 #ifdef CONFIG_NETFILTER 1165 static __inline void 1166 nf_conntrack_put(struct nf_ct_info *nfct) 1167 { 1168 if (nfct && atomic_dec_and_test(&nfct->master->use)) 1169 nfct->master->destroy(nfct->master); 1170 } 1171 static __inline void 1172 nf_conntrack_get(struct nf_ct_info *nfct) 1173 { 1174 if (nfct) 1175 atomic_inc(&nfct->master->use); 1176 } 1177 #endif 1178 1179 1180 #endif /* skbuff */ 1181 1182 1183 1184 1185 1186 struct sock; 1187 1188 typedef struct sockaddr 1189 { 1190 int x; 1191 } _sockaddr; 1192 1193 1194 struct msghdr { 1195 void * msg_name; /* Socket name */ 1196 int msg_namelen; /* Length of name */ 1197 struct iovec * msg_iov; /* Data blocks */ 1198 __kernel_size_t msg_iovlen; /* Number of blocks */ 1199 void * msg_control; /* Per protocol magic (eg BSD file descriptor passing) */ 1200 __kernel_size_t msg_controllen; /* Length of cmsg list */ 1201 unsigned msg_flags; 1202 }; 1203 1204 1205 /* IP protocol blocks we attach to sockets. 1206 * socket layer -> transport layer interface 1207 * transport -> network interface is defined by struct inet_proto 1208 */ 1209 struct proto { 1210 void (*close)(struct sock *sk, 1211 long timeout); 1212 int (*connect)(struct sock *sk, 1213 struct sockaddr *uaddr, 1214 int addr_len); 1215 int (*disconnect)(struct sock *sk, int flags); 1216 1217 struct sock * (*accept) (struct sock *sk, int flags, int *err); 1218 1219 int (*ioctl)(struct sock *sk, int cmd, 1220 unsigned long arg); 1221 int (*init)(struct sock *sk); 1222 int (*destroy)(struct sock *sk); 1223 void (*shutdown)(struct sock *sk, int how); 1224 int (*setsockopt)(struct sock *sk, int level, 1225 int optname, char *optval, int optlen); 1226 int (*getsockopt)(struct sock *sk, int level, 1227 int optname, char *optval, 1228 int *option); 1229 int (*sendmsg)(struct sock *sk, struct msghdr *msg, 1230 int len); 1231 int (*recvmsg)(struct sock *sk, struct msghdr *msg, 1232 int len, int noblock, int flags, 1233 int *addr_len); 1234 int (*bind)(struct sock *sk, 1235 struct sockaddr *uaddr, int addr_len); 1236 1237 int (*backlog_rcv) (struct sock *sk, 1238 struct sk_buff *skb); 1239 1240 /* Keeping track of sk's, looking them up, and port selection methods. */ 1241 void (*hash)(struct sock *sk); 1242 void (*unhash)(struct sock *sk); 1243 int (*get_port)(struct sock *sk, unsigned short snum); 1244 1245 char name[32]; 1246 1247 struct { 1248 int inuse; 1249 } stats[32]; 1250 // u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; 1251 // } stats[NR_CPUS]; 1252 }; 1253 1254 1255 1256 1257 1258 1259 1260 /* This defines a selective acknowledgement block. */ 1261 struct tcp_sack_block { 1262 __u32 start_seq; 1263 __u32 end_seq; 1264 }; 1265 1266 1267 struct tcp_opt { 1268 int tcp_header_len; /* Bytes of tcp header to send */ 1269 1270 /* 1271 * Header prediction flags 1272 * 0x5?10 << 16 + snd_wnd in net byte order 1273 */ 1274 __u32 pred_flags; 1275 1276 /* 1277 * RFC793 variables by their proper names. This means you can 1278 * read the code and the spec side by side (and laugh ...) 1279 * See RFC793 and RFC1122. The RFC writes these in capitals. 1280 */ 1281 __u32 rcv_nxt; /* What we want to receive next */ 1282 __u32 snd_nxt; /* Next sequence we send */ 1283 1284 __u32 snd_una; /* First byte we want an ack for */ 1285 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ 1286 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ 1287 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ 1288 1289 /* Delayed ACK control data */ 1290 struct { 1291 __u8 pending; /* ACK is pending */ 1292 __u8 quick; /* Scheduled number of quick acks */ 1293 __u8 pingpong; /* The session is interactive */ 1294 __u8 blocked; /* Delayed ACK was blocked by socket lock*/ 1295 __u32 ato; /* Predicted tick of soft clock */ 1296 unsigned long timeout; /* Currently scheduled timeout */ 1297 __u32 lrcvtime; /* timestamp of last received data packet*/ 1298 __u16 last_seg_size; /* Size of last incoming segment */ 1299 __u16 rcv_mss; /* MSS used for delayed ACK decisions */ 1300 } ack; 1301 1302 /* Data for direct copy to user */ 1303 struct { 1304 //struct sk_buff_head prequeue; 1305 struct task_struct *task; 1306 struct iovec *iov; 1307 int memory; 1308 int len; 1309 } ucopy; 1310 1311 __u32 snd_wl1; /* Sequence for window update */ 1312 __u32 snd_wnd; /* The window we expect to receive */ 1313 __u32 max_window; /* Maximal window ever seen from peer */ 1314 __u32 pmtu_cookie; /* Last pmtu seen by socket */ 1315 __u16 mss_cache; /* Cached effective mss, not including SACKS */ 1316 __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ 1317 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ 1318 __u8 ca_state; /* State of fast-retransmit machine */ 1319 __u8 retransmits; /* Number of unrecovered RTO timeouts. */ 1320 1321 __u8 reordering; /* Packet reordering metric. */ 1322 __u8 queue_shrunk; /* Write queue has been shrunk recently.*/ 1323 __u8 defer_accept; /* User waits for some data after accept() */ 1324 1325 /* RTT measurement */ 1326 __u8 backoff; /* backoff */ 1327 __u32 srtt; /* smothed round trip time << 3 */ 1328 __u32 mdev; /* medium deviation */ 1329 __u32 mdev_max; /* maximal mdev for the last rtt period */ 1330 __u32 rttvar; /* smoothed mdev_max */ 1331 __u32 rtt_seq; /* sequence number to update rttvar */ 1332 __u32 rto; /* retransmit timeout */ 1333 1334 __u32 packets_out; /* Packets which are "in flight" */ 1335 __u32 left_out; /* Packets which leaved network */ 1336 __u32 retrans_out; /* Retransmitted packets out */ 1337 1338 1339 /* 1340 * Slow start and congestion control (see also Nagle, and Karn & Partridge) 1341 */ 1342 __u32 snd_ssthresh; /* Slow start size threshold */ 1343 __u32 snd_cwnd; /* Sending congestion window */ 1344 __u16 snd_cwnd_cnt; /* Linear increase counter */ 1345 __u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ 1346 __u32 snd_cwnd_used; 1347 __u32 snd_cwnd_stamp; 1348 1349 /* Two commonly used timers in both sender and receiver paths. */ 1350 unsigned long timeout; 1351 struct timer_list retransmit_timer; /* Resend (no ack) */ 1352 struct timer_list delack_timer; /* Ack delay */ 1353 1354 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ 1355 1356 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ 1357 struct sk_buff *send_head; /* Front of stuff to transmit */ 1358 struct page *sndmsg_page; /* Cached page for sendmsg */ 1359 u32 sndmsg_off; /* Cached offset for sendmsg */ 1360 1361 __u32 rcv_wnd; /* Current receiver window */ 1362 __u32 rcv_wup; /* rcv_nxt on last window update sent */ 1363 __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ 1364 __u32 pushed_seq; /* Last pushed seq, required to talk to windows */ 1365 __u32 copied_seq; /* Head of yet unread data */ 1366 /* 1367 * Options received (usually on last packet, some only on SYN packets). 1368 */ 1369 char tstamp_ok, /* TIMESTAMP seen on SYN packet */ 1370 wscale_ok, /* Wscale seen on SYN packet */ 1371 sack_ok; /* SACK seen on SYN packet */ 1372 char saw_tstamp; /* Saw TIMESTAMP on last packet */ 1373 __u8 snd_wscale; /* Window scaling received from sender */ 1374 __u8 rcv_wscale; /* Window scaling to send to receiver */ 1375 __u8 nonagle; /* Disable Nagle algorithm? */ 1376 __u8 keepalive_probes; /* num of allowed keep alive probes */ 1377 1378 /* PAWS/RTTM data */ 1379 __u32 rcv_tsval; /* Time stamp value */ 1380 __u32 rcv_tsecr; /* Time stamp echo reply */ 1381 __u32 ts_recent; /* Time stamp to echo next */ 1382 long ts_recent_stamp;/* Time we stored ts_recent (for aging) */ 1383 1384 /* SACKs data */ 1385 __u16 user_mss; /* mss requested by user in ioctl */ 1386 __u8 dsack; /* D-SACK is scheduled */ 1387 __u8 eff_sacks; /* Size of SACK array to send with next packet */ 1388 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ 1389 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ 1390 1391 __u32 window_clamp; /* Maximal window to advertise */ 1392 __u32 rcv_ssthresh; /* Current window clamp */ 1393 __u8 probes_out; /* unanswered 0 window probes */ 1394 __u8 num_sacks; /* Number of SACK blocks */ 1395 __u16 advmss; /* Advertised MSS */ 1396 1397 __u8 syn_retries; /* num of allowed syn retries */ 1398 __u8 ecn_flags; /* ECN status bits. */ 1399 __u16 prior_ssthresh; /* ssthresh saved at recovery start */ 1400 __u32 lost_out; /* Lost packets */ 1401 __u32 sacked_out; /* SACK'd packets */ 1402 __u32 fackets_out; /* FACK'd packets */ 1403 __u32 high_seq; /* snd_nxt at onset of congestion */ 1404 1405 __u32 retrans_stamp; /* Timestamp of the last retransmit, 1406 * also used in SYN-SENT to remember stamp of 1407 * the first SYN. */ 1408 __u32 undo_marker; /* tracking retrans started here. */ 1409 int undo_retrans; /* number of undoable retransmissions. */ 1410 __u32 urg_seq; /* Seq of received urgent pointer */ 1411 __u16 urg_data; /* Saved octet of OOB data and control flags */ 1412 __u8 pending; /* Scheduled timer event */ 1413 __u8 urg_mode; /* In urgent mode */ 1414 __u32 snd_up; /* Urgent pointer */ 1415 1416 /* The syn_wait_lock is necessary only to avoid tcp_get_info having 1417 * to grab the main lock sock while browsing the listening hash 1418 * (otherwise it's deadlock prone). 1419 * This lock is acquired in read mode only from tcp_get_info() and 1420 * it's acquired in write mode _only_ from code that is actively 1421 * changing the syn_wait_queue. All readers that are holding 1422 * the master sock lock don't need to grab this lock in read mode 1423 * too as the syn_wait_queue writes are always protected from 1424 * the main sock lock. 1425 */ 1426 rwlock_t syn_wait_lock; 1427 struct tcp_listen_opt *listen_opt; 1428 1429 /* FIFO of established children */ 1430 struct open_request *accept_queue; 1431 struct open_request *accept_queue_tail; 1432 1433 int write_pending; /* A write to socket waits to start. */ 1434 1435 unsigned int keepalive_time; /* time before keep alive takes place */ 1436 unsigned int keepalive_intvl; /* time interval between keep alive probes */ 1437 int linger2; 1438 1439 unsigned long last_synq_overflow; 1440 }; 1441 1442 1443 1444 1445 /* This is the per-socket lock. The spinlock provides a synchronization 1446 * between user contexts and software interrupt processing, whereas the 1447 * mini-semaphore synchronizes multiple users amongst themselves. 1448 */ 1449 typedef struct { 1450 spinlock_t slock; 1451 unsigned int users; 1452 wait_queue_head_t wq; 1453 } socket_lock_t; 1454 1455 struct sock { 1456 /* Socket demultiplex comparisons on incoming packets. */ 1457 __u32 daddr; /* Foreign IPv4 addr */ 1458 __u32 rcv_saddr; /* Bound local IPv4 addr */ 1459 __u16 dport; /* Destination port */ 1460 unsigned short num; /* Local port */ 1461 int bound_dev_if; /* Bound device index if != 0 */ 1462 1463 /* Main hash linkage for various protocol lookup tables. */ 1464 struct sock *next; 1465 struct sock **pprev; 1466 struct sock *bind_next; 1467 struct sock **bind_pprev; 1468 1469 volatile unsigned char state, /* Connection state */ 1470 zapped; /* In ax25 & ipx means not linked */ 1471 __u16 sport; /* Source port */ 1472 1473 unsigned short family; /* Address family */ 1474 unsigned char reuse; /* SO_REUSEADDR setting */ 1475 unsigned char shutdown; 1476 atomic_t refcnt; /* Reference count */ 1477 1478 socket_lock_t lock; /* Synchronizer... */ 1479 int rcvbuf; /* Size of receive buffer in bytes */ 1480 1481 wait_queue_head_t *sleep; /* Sock wait queue */ 1482 struct dst_entry *dst_cache; /* Destination cache */ 1483 rwlock_t dst_lock; 1484 atomic_t rmem_alloc; /* Receive queue bytes committed */ 1485 struct sk_buff_head receive_queue; /* Incoming packets */ 1486 atomic_t wmem_alloc; /* Transmit queue bytes committed */ 1487 struct sk_buff_head write_queue; /* Packet sending queue */ 1488 atomic_t omem_alloc; /* "o" is "option" or "other" */ 1489 int wmem_queued; /* Persistent queue size */ 1490 int forward_alloc; /* Space allocated forward. */ 1491 __u32 saddr; /* Sending source */ 1492 unsigned int allocation; /* Allocation mode */ 1493 int sndbuf; /* Size of send buffer in bytes */ 1494 struct sock *prev; 1495 1496 /* Not all are volatile, but some are, so we might as well say they all are. 1497 * XXX Make this a flag word -DaveM 1498 */ 1499 volatile char dead, 1500 done, 1501 urginline, 1502 keepopen, 1503 linger, 1504 destroy, 1505 no_check, 1506 broadcast, 1507 bsdism; 1508 unsigned char debug; 1509 unsigned char rcvtstamp; 1510 unsigned char use_write_queue; 1511 unsigned char userlocks; 1512 /* Hole of 3 bytes. Try to pack. */ 1513 int route_caps; 1514 int proc; 1515 unsigned long lingertime; 1516 1517 int hashent; 1518 struct sock *pair; 1519 1520 /* The backlog queue is special, it is always used with 1521 * the per-socket spinlock held and requires low latency 1522 * access. Therefore we special case it's implementation. 1523 */ 1524 struct { 1525 struct sk_buff *head; 1526 struct sk_buff *tail; 1527 } backlog; 1528 1529 rwlock_t callback_lock; 1530 1531 /* Error queue, rarely used. */ 1532 struct sk_buff_head error_queue; 1533 1534 struct proto *prot; 1535 1536 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 1537 union { 1538 struct ipv6_pinfo af_inet6; 1539 } net_pinfo; 1540 #endif 1541 1542 union { 1543 struct tcp_opt af_tcp; 1544 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE) 1545 struct raw_opt tp_raw4; 1546 #endif 1547 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 1548 struct raw6_opt tp_raw; 1549 #endif /* CONFIG_IPV6 */ 1550 #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE) 1551 struct spx_opt af_spx; 1552 #endif /* CONFIG_SPX */ 1553 1554 } tp_pinfo; 1555 1556 int err, err_soft; /* Soft holds errors that don't 1557 cause failure but are the cause 1558 of a persistent failure not just 1559 'timed out' */ 1560 unsigned short ack_backlog; 1561 unsigned short max_ack_backlog; 1562 __u32 priority; 1563 unsigned short type; 1564 unsigned char localroute; /* Route locally only */ 1565 unsigned char protocol; 1566 // struct ucred peercred; 1567 int rcvlowat; 1568 long rcvtimeo; 1569 long sndtimeo; 1570 1571 #ifdef CONFIG_FILTER 1572 /* Socket Filtering Instructions */ 1573 struct sk_filter *filter; 1574 #endif /* CONFIG_FILTER */ 1575 1576 /* This is where all the private (optional) areas that don't 1577 * overlap will eventually live. 1578 */ 1579 union { 1580 void *destruct_hook; 1581 // struct unix_opt af_unix; 1582 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE) 1583 struct inet_opt af_inet; 1584 #endif 1585 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) 1586 struct atalk_sock af_at; 1587 #endif 1588 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) 1589 struct ipx_opt af_ipx; 1590 #endif 1591 #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE) 1592 struct dn_scp dn; 1593 #endif 1594 #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE) 1595 struct packet_opt *af_packet; 1596 #endif 1597 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE) 1598 x25_cb *x25; 1599 #endif 1600 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 1601 ax25_cb *ax25; 1602 #endif 1603 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) 1604 nr_cb *nr; 1605 #endif 1606 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE) 1607 rose_cb *rose; 1608 #endif 1609 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE) 1610 struct pppox_opt *pppox; 1611 #endif 1612 struct netlink_opt *af_netlink; 1613 #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE) 1614 struct econet_opt *af_econet; 1615 #endif 1616 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE) 1617 struct atm_vcc *af_atm; 1618 #endif 1619 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE) 1620 struct irda_sock *irda; 1621 #endif 1622 #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE) 1623 struct wanpipe_opt *af_wanpipe; 1624 #endif 1625 } protinfo; 1626 1627 1628 /* This part is used for the timeout functions. */ 1629 struct timer_list timer; /* This is the sock cleanup timer. */ 1630 struct timeval stamp; 1631 1632 /* Identd and reporting IO signals */ 1633 struct socket *socket; 1634 1635 /* RPC layer private data */ 1636 void *user_data; 1637 1638 /* Callbacks */ 1639 void (*state_change)(struct sock *sk); 1640 void (*data_ready)(struct sock *sk,int bytes); 1641 void (*write_space)(struct sock *sk); 1642 void (*error_report)(struct sock *sk); 1643 1644 int (*backlog_rcv) (struct sock *sk, 1645 struct sk_buff *skb); 1646 void (*destruct)(struct sock *sk); 1647 }; 1648 1649 1650 1651 1652 #if 1 /* dst (_NET_DST_H) */ 1653 1654 #if 0 1655 #include <linux/config.h> 1656 #include <net/neighbour.h> 1657 #endif 1658 1659 /* 1660 * 0 - no debugging messages 1661 * 1 - rare events and bugs (default) 1662 * 2 - trace mode. 1663 */ 1664 #define RT_CACHE_DEBUG 0 1665 1666 #define DST_GC_MIN (1*HZ) 1667 #define DST_GC_INC (5*HZ) 1668 #define DST_GC_MAX (120*HZ) 1669 1670 struct sk_buff; 1671 1672 struct dst_entry 1673 { 1674 struct dst_entry *next; 1675 atomic_t __refcnt; /* client references */ 1676 int __use; 1677 struct net_device *dev; 1678 int obsolete; 1679 int flags; 1680 #define DST_HOST 1 1681 unsigned long lastuse; 1682 unsigned long expires; 1683 1684 unsigned mxlock; 1685 unsigned pmtu; 1686 unsigned window; 1687 unsigned rtt; 1688 unsigned rttvar; 1689 unsigned ssthresh; 1690 unsigned cwnd; 1691 unsigned advmss; 1692 unsigned reordering; 1693 1694 unsigned long rate_last; /* rate limiting for ICMP */ 1695 unsigned long rate_tokens; 1696 1697 int error; 1698 1699 struct neighbour *neighbour; 1700 struct hh_cache *hh; 1701 1702 int (*input)(struct sk_buff*); 1703 int (*output)(struct sk_buff*); 1704 1705 #ifdef CONFIG_NET_CLS_ROUTE 1706 __u32 tclassid; 1707 #endif 1708 1709 struct dst_ops *ops; 1710 1711 char info[0]; 1712 }; 1713 1714 1715 struct dst_ops 1716 { 1717 unsigned short family; 1718 unsigned short protocol; 1719 unsigned gc_thresh; 1720 1721 int (*gc)(void); 1722 struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); 1723 struct dst_entry * (*reroute)(struct dst_entry *, 1724 struct sk_buff *); 1725 void (*destroy)(struct dst_entry *); 1726 struct dst_entry * (*negative_advice)(struct dst_entry *); 1727 void (*link_failure)(struct sk_buff *); 1728 int entry_size; 1729 1730 atomic_t entries; 1731 kmem_cache_t *kmem_cachep; 1732 }; 1733 1734 #ifdef __KERNEL__ 1735 1736 static __inline void dst_hold(struct dst_entry * dst) 1737 { 1738 atomic_inc(&dst->__refcnt); 1739 } 1740 1741 static __inline 1742 struct dst_entry * dst_clone(struct dst_entry * dst) 1743 { 1744 if (dst) 1745 atomic_inc(&dst->__refcnt); 1746 return dst; 1747 } 1748 1749 static __inline 1750 void dst_release(struct dst_entry * dst) 1751 { 1752 if (dst) 1753 atomic_dec(&dst->__refcnt); 1754 } 1755 1756 extern void * dst_alloc(struct dst_ops * ops); 1757 extern void __dst_free(struct dst_entry * dst); 1758 extern void dst_destroy(struct dst_entry * dst); 1759 1760 static __inline 1761 void dst_free(struct dst_entry * dst) 1762 { 1763 if (dst->obsolete > 1) 1764 return; 1765 if (!atomic_read(&dst->__refcnt)) { 1766 dst_destroy(dst); 1767 return; 1768 } 1769 __dst_free(dst); 1770 } 1771 1772 static __inline void dst_confirm(struct dst_entry *dst) 1773 { 1774 if (dst) 1775 neigh_confirm(dst->neighbour); 1776 } 1777 1778 static __inline void dst_negative_advice(struct dst_entry **dst_p) 1779 { 1780 struct dst_entry * dst = *dst_p; 1781 if (dst && dst->ops->negative_advice) 1782 *dst_p = dst->ops->negative_advice(dst); 1783 } 1784 1785 static __inline void dst_link_failure(struct sk_buff *skb) 1786 { 1787 struct dst_entry * dst = skb->dst; 1788 if (dst && dst->ops && dst->ops->link_failure) 1789 dst->ops->link_failure(skb); 1790 } 1791 1792 static __inline void dst_set_expires(struct dst_entry *dst, int timeout) 1793 { 1794 unsigned long expires = jiffies + timeout; 1795 1796 if (expires == 0) 1797 expires = 1; 1798 1799 if (dst->expires == 0 || (long)(dst->expires - expires) > 0) 1800 dst->expires = expires; 1801 } 1802 1803 extern void dst_init(void); 1804 1805 #endif /* dst */ 1806 1807 1808 1809 #if 1 1810 /* dummy types */ 1811 1812 1813 #endif 1814 1815 #define TCP_DEBUG 1 1816 #define FASTRETRANS_DEBUG 1 1817 1818 /* Cancel timers, when they are not required. */ 1819 #undef TCP_CLEAR_TIMERS 1820 1821 #if 0 1822 #include <linux/config.h> 1823 #include <linux/tcp.h> 1824 #include <linux/slab.h> 1825 #include <linux/cache.h> 1826 #include <net/checksum.h> 1827 #include <net/sock.h> 1828 #else 1829 #include "linux.h" 1830 #endif 1831 1832 /* This is for all connections with a full identity, no wildcards. 1833 * New scheme, half the table is for TIME_WAIT, the other half is 1834 * for the rest. I'll experiment with dynamic table growth later. 1835 */ 1836 struct tcp_ehash_bucket { 1837 rwlock_t lock; 1838 struct sock *chain; 1839 } __attribute__((__aligned__(8))); 1840 1841 /* This is for listening sockets, thus all sockets which possess wildcards. */ 1842 #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ 1843 1844 /* There are a few simple rules, which allow for local port reuse by 1845 * an application. In essence: 1846 * 1847 * 1) Sockets bound to different interfaces may share a local port. 1848 * Failing that, goto test 2. 1849 * 2) If all sockets have sk->reuse set, and none of them are in 1850 * TCP_LISTEN state, the port may be shared. 1851 * Failing that, goto test 3. 1852 * 3) If all sockets are bound to a specific sk->rcv_saddr local 1853 * address, and none of them are the same, the port may be 1854 * shared. 1855 * Failing this, the port cannot be shared. 1856 * 1857 * The interesting point, is test #2. This is what an FTP server does 1858 * all day. To optimize this case we use a specific flag bit defined 1859 * below. As we add sockets to a bind bucket list, we perform a 1860 * check of: (newsk->reuse && (newsk->state != TCP_LISTEN)) 1861 * As long as all sockets added to a bind bucket pass this test, 1862 * the flag bit will be set. 1863 * The resulting situation is that tcp_v[46]_verify_bind() can just check 1864 * for this flag bit, if it is set and the socket trying to bind has 1865 * sk->reuse set, we don't even have to walk the owners list at all, 1866 * we return that it is ok to bind this socket to the requested local port. 1867 * 1868 * Sounds like a lot of work, but it is worth it. In a more naive 1869 * implementation (ie. current FreeBSD etc.) the entire list of ports 1870 * must be walked for each data port opened by an ftp server. Needless 1871 * to say, this does not scale at all. With a couple thousand FTP 1872 * users logged onto your box, isn't it nice to know that new data 1873 * ports are created in O(1) time? I thought so. ;-) -DaveM 1874 */ 1875 struct tcp_bind_bucket { 1876 unsigned short port; 1877 signed short fastreuse; 1878 struct tcp_bind_bucket *next; 1879 struct sock *owners; 1880 struct tcp_bind_bucket **pprev; 1881 }; 1882 1883 struct tcp_bind_hashbucket { 1884 spinlock_t lock; 1885 struct tcp_bind_bucket *chain; 1886 }; 1887 1888 extern struct tcp_hashinfo { 1889 /* This is for sockets with full identity only. Sockets here will 1890 * always be without wildcards and will have the following invariant: 1891 * 1892 * TCP_ESTABLISHED <= sk->state < TCP_CLOSE 1893 * 1894 * First half of the table is for sockets not in TIME_WAIT, second half 1895 * is for TIME_WAIT sockets only. 1896 */ 1897 struct tcp_ehash_bucket *__tcp_ehash; 1898 1899 /* Ok, let's try this, I give up, we do need a local binding 1900 * TCP hash as well as the others for fast bind/connect. 1901 */ 1902 struct tcp_bind_hashbucket *__tcp_bhash; 1903 1904 int __tcp_bhash_size; 1905 int __tcp_ehash_size; 1906 1907 /* All sockets in TCP_LISTEN state will be in here. This is the only 1908 * table where wildcard'd TCP sockets can exist. Hash function here 1909 * is just local port number. 1910 */ 1911 struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE]; 1912 1913 /* All the above members are written once at bootup and 1914 * never written again _or_ are predominantly read-access. 1915 * 1916 * Now align to a new cache line as all the following members 1917 * are often dirty. 1918 */ 1919 rwlock_t __tcp_lhash_lock ____cacheline_aligned; 1920 atomic_t __tcp_lhash_users; 1921 wait_queue_head_t __tcp_lhash_wait; 1922 spinlock_t __tcp_portalloc_lock; 1923 } tcp_hashinfo; 1924 1925 #define tcp_ehash (tcp_hashinfo.__tcp_ehash) 1926 #define tcp_bhash (tcp_hashinfo.__tcp_bhash) 1927 #define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) 1928 #define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) 1929 #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) 1930 #define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) 1931 #define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) 1932 #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) 1933 #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) 1934 1935 extern kmem_cache_t *tcp_bucket_cachep; 1936 extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, 1937 unsigned short snum); 1938 extern void tcp_bucket_unlock(struct sock *sk); 1939 extern int tcp_port_rover; 1940 extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif); 1941 1942 /* These are AF independent. */ 1943 static __inline int tcp_bhashfn(__u16 lport) 1944 { 1945 return (lport & (tcp_bhash_size - 1)); 1946 } 1947 1948 /* This is a TIME_WAIT bucket. It works around the memory consumption 1949 * problems of sockets in such a state on heavily loaded servers, but 1950 * without violating the protocol specification. 1951 */ 1952 struct tcp_tw_bucket { 1953 /* These _must_ match the beginning of struct sock precisely. 1954 * XXX Yes I know this is gross, but I'd have to edit every single 1955 * XXX networking file if I created a "struct sock_header". -DaveM 1956 */ 1957 __u32 daddr; 1958 __u32 rcv_saddr; 1959 __u16 dport; 1960 unsigned short num; 1961 int bound_dev_if; 1962 struct sock *next; 1963 struct sock **pprev; 1964 struct sock *bind_next; 1965 struct sock **bind_pprev; 1966 unsigned char state, 1967 substate; /* "zapped" is replaced with "substate" */ 1968 __u16 sport; 1969 unsigned short family; 1970 unsigned char reuse, 1971 rcv_wscale; /* It is also TW bucket specific */ 1972 atomic_t refcnt; 1973 1974 /* And these are ours. */ 1975 int hashent; 1976 int timeout; 1977 __u32 rcv_nxt; 1978 __u32 snd_nxt; 1979 __u32 rcv_wnd; 1980 __u32 ts_recent; 1981 long ts_recent_stamp; 1982 unsigned long ttd; 1983 struct tcp_bind_bucket *tb; 1984 struct tcp_tw_bucket *next_death; 1985 struct tcp_tw_bucket **pprev_death; 1986 1987 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 1988 struct in6_addr v6_daddr; 1989 struct in6_addr v6_rcv_saddr; 1990 #endif 1991 }; 1992 1993 extern kmem_cache_t *tcp_timewait_cachep; 1994 1995 static __inline void tcp_tw_put(struct tcp_tw_bucket *tw) 1996 { 1997 if (atomic_dec_and_test(&tw->refcnt)) { 1998 #ifdef INET_REFCNT_DEBUG 1999 printk(KERN_DEBUG "tw_bucket %p released\n", tw); 2000 #endif 2001 kmem_cache_free(tcp_timewait_cachep, tw); 2002 } 2003 } 2004 2005 extern atomic_t tcp_orphan_count; 2006 extern int tcp_tw_count; 2007 extern void tcp_time_wait(struct sock *sk, int state, int timeo); 2008 extern void tcp_timewait_kill(struct tcp_tw_bucket *tw); 2009 extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); 2010 extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); 2011 2012 2013 /* Socket demux engine toys. */ 2014 #ifdef __BIG_ENDIAN 2015 #define TCP_COMBINED_PORTS(__sport, __dport) \ 2016 (((__u32)(__sport)<<16) | (__u32)(__dport)) 2017 #else /* __LITTLE_ENDIAN */ 2018 #define TCP_COMBINED_PORTS(__sport, __dport) \ 2019 (((__u32)(__dport)<<16) | (__u32)(__sport)) 2020 #endif 2021 2022 #if (BITS_PER_LONG == 64) 2023 #ifdef __BIG_ENDIAN 2024 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ 2025 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); 2026 #else /* __LITTLE_ENDIAN */ 2027 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ 2028 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); 2029 #endif /* __BIG_ENDIAN */ 2030 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ 2031 (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \ 2032 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ 2033 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) 2034 #else /* 32-bit arch */ 2035 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) 2036 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ 2037 (((__sk)->daddr == (__saddr)) && \ 2038 ((__sk)->rcv_saddr == (__daddr)) && \ 2039 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ 2040 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) 2041 #endif /* 64-bit arch */ 2042 2043 #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ 2044 (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ 2045 ((__sk)->family == AF_INET6) && \ 2046 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \ 2047 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \ 2048 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif)))) 2049 2050 /* These can have wildcards, don't try too hard. */ 2051 static __inline int tcp_lhashfn(unsigned short num) 2052 { 2053 #if 0 2054 return num & (TCP_LHTABLE_SIZE - 1); 2055 #else 2056 return 0; 2057 #endif 2058 } 2059 2060 static __inline int tcp_sk_listen_hashfn(struct sock *sk) 2061 { 2062 #if 0 2063 return tcp_lhashfn(sk->num); 2064 #else 2065 return 0; 2066 #endif 2067 } 2068 2069 #define MAX_TCP_HEADER (128 + MAX_HEADER) 2070 2071 /* 2072 * Never offer a window over 32767 without using window scaling. Some 2073 * poor stacks do signed 16bit maths! 2074 */ 2075 #define MAX_TCP_WINDOW 32767U 2076 2077 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ 2078 #define TCP_MIN_MSS 88U 2079 2080 /* Minimal RCV_MSS. */ 2081 #define TCP_MIN_RCVMSS 536U 2082 2083 /* After receiving this amount of duplicate ACKs fast retransmit starts. */ 2084 #define TCP_FASTRETRANS_THRESH 3 2085 2086 /* Maximal reordering. */ 2087 #define TCP_MAX_REORDERING 127 2088 2089 /* Maximal number of ACKs sent quickly to accelerate slow-start. */ 2090 #define TCP_MAX_QUICKACKS 16U 2091 2092 /* urg_data states */ 2093 #define TCP_URG_VALID 0x0100 2094 #define TCP_URG_NOTYET 0x0200 2095 #define TCP_URG_READ 0x0400 2096 2097 #define TCP_RETR1 3 /* 2098 * This is how many retries it does before it 2099 * tries to figure out if the gateway is 2100 * down. Minimal RFC value is 3; it corresponds 2101 * to ~3sec-8min depending on RTO. 2102 */ 2103 2104 #define TCP_RETR2 15 /* 2105 * This should take at least 2106 * 90 minutes to time out. 2107 * RFC1122 says that the limit is 100 sec. 2108 * 15 is ~13-30min depending on RTO. 2109 */ 2110 2111 #define TCP_SYN_RETRIES 5 /* number of times to retry active opening a 2112 * connection: ~180sec is RFC minumum */ 2113 2114 #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a 2115 * connection: ~180sec is RFC minumum */ 2116 2117 2118 #define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned 2119 * socket. 7 is ~50sec-16min. 2120 */ 2121 2122 2123 #define TCP_TIMEWAIT_LEN (60*1000) 2124 //#define TCP_TIMEWAIT_LEN (60*HZ) 2125 /* how long to wait to destroy TIME-WAIT 2126 * state, about 60 seconds */ 2127 #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN 2128 /* BSD style FIN_WAIT2 deadlock breaker. 2129 * It used to be 3min, new value is 60sec, 2130 * to combine FIN-WAIT-2 timeout with 2131 * TIME-WAIT timer. 2132 */ 2133 2134 #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ 2135 #if HZ >= 100 2136 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ 2137 #define TCP_ATO_MIN ((unsigned)(HZ/25)) 2138 #else 2139 #define TCP_DELACK_MIN 4U 2140 #define TCP_ATO_MIN 4U 2141 #endif 2142 #define TCP_RTO_MAX ((unsigned)(120*HZ)) 2143 #define TCP_RTO_MIN ((unsigned)(HZ/5)) 2144 #define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */ 2145 2146 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes 2147 * for local resources. 2148 */ 2149 2150 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ 2151 #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ 2152 #define TCP_KEEPALIVE_INTVL (75*HZ) 2153 2154 #define MAX_TCP_KEEPIDLE 32767 2155 #define MAX_TCP_KEEPINTVL 32767 2156 #define MAX_TCP_KEEPCNT 127 2157 #define MAX_TCP_SYNCNT 127 2158 2159 /* TIME_WAIT reaping mechanism. */ 2160 #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ 2161 #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS) 2162 2163 #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */ 2164 #define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */ 2165 2166 #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24) 2167 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated 2168 * after this time. It should be equal 2169 * (or greater than) TCP_TIMEWAIT_LEN 2170 * to provide reliability equal to one 2171 * provided by timewait state. 2172 */ 2173 #define TCP_PAWS_WINDOW 1 /* Replay window for per-host 2174 * timestamps. It must be less than 2175 * minimal timewait lifetime. 2176 */ 2177 2178 #define TCP_TW_RECYCLE_SLOTS_LOG 5 2179 #define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG) 2180 2181 /* If time > 4sec, it is "slow" path, no recycling is required, 2182 so that we select tick to get range about 4 seconds. 2183 */ 2184 2185 #if 0 2186 #if HZ <= 16 || HZ > 4096 2187 # error Unsupported: HZ <= 16 or HZ > 4096 2188 #elif HZ <= 32 2189 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) 2190 #elif HZ <= 64 2191 # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) 2192 #elif HZ <= 128 2193 # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) 2194 #elif HZ <= 256 2195 # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) 2196 #elif HZ <= 512 2197 # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) 2198 #elif HZ <= 1024 2199 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) 2200 #elif HZ <= 2048 2201 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) 2202 #else 2203 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) 2204 #endif 2205 #else 2206 #define TCP_TW_RECYCLE_TICK (0) 2207 #endif 2208 2209 /* 2210 * TCP option 2211 */ 2212 2213 #define TCPOPT_NOP 1 /* Padding */ 2214 #define TCPOPT_EOL 0 /* End of options */ 2215 #define TCPOPT_MSS 2 /* Segment size negotiating */ 2216 #define TCPOPT_WINDOW 3 /* Window scaling */ 2217 #define TCPOPT_SACK_PERM 4 /* SACK Permitted */ 2218 #define TCPOPT_SACK 5 /* SACK Block */ 2219 #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ 2220 2221 /* 2222 * TCP option lengths 2223 */ 2224 2225 #define TCPOLEN_MSS 4 2226 #define TCPOLEN_WINDOW 3 2227 #define TCPOLEN_SACK_PERM 2 2228 #define TCPOLEN_TIMESTAMP 10 2229 2230 /* But this is what stacks really send out. */ 2231 #define TCPOLEN_TSTAMP_ALIGNED 12 2232 #define TCPOLEN_WSCALE_ALIGNED 4 2233 #define TCPOLEN_SACKPERM_ALIGNED 4 2234 #define TCPOLEN_SACK_BASE 2 2235 #define TCPOLEN_SACK_BASE_ALIGNED 4 2236 #define TCPOLEN_SACK_PERBLOCK 8 2237 2238 #define TCP_TIME_RETRANS 1 /* Retransmit timer */ 2239 #define TCP_TIME_DACK 2 /* Delayed ack timer */ 2240 #define TCP_TIME_PROBE0 3 /* Zero window probe timer */ 2241 #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ 2242 2243 #if 0 2244 /* sysctl variables for tcp */ 2245 extern int sysctl_max_syn_backlog; 2246 extern int sysctl_tcp_timestamps; 2247 extern int sysctl_tcp_window_scaling; 2248 extern int sysctl_tcp_sack; 2249 extern int sysctl_tcp_fin_timeout; 2250 extern int sysctl_tcp_tw_recycle; 2251 extern int sysctl_tcp_keepalive_time; 2252 extern int sysctl_tcp_keepalive_probes; 2253 extern int sysctl_tcp_keepalive_intvl; 2254 extern int sysctl_tcp_syn_retries; 2255 extern int sysctl_tcp_synack_retries; 2256 extern int sysctl_tcp_retries1; 2257 extern int sysctl_tcp_retries2; 2258 extern int sysctl_tcp_orphan_retries; 2259 extern int sysctl_tcp_syncookies; 2260 extern int sysctl_tcp_retrans_collapse; 2261 extern int sysctl_tcp_stdurg; 2262 extern int sysctl_tcp_rfc1337; 2263 extern int sysctl_tcp_abort_on_overflow; 2264 extern int sysctl_tcp_max_orphans; 2265 extern int sysctl_tcp_max_tw_buckets; 2266 extern int sysctl_tcp_fack; 2267 extern int sysctl_tcp_reordering; 2268 extern int sysctl_tcp_ecn; 2269 extern int sysctl_tcp_dsack; 2270 extern int sysctl_tcp_mem[3]; 2271 extern int sysctl_tcp_wmem[3]; 2272 extern int sysctl_tcp_rmem[3]; 2273 extern int sysctl_tcp_app_win; 2274 extern int sysctl_tcp_adv_win_scale; 2275 extern int sysctl_tcp_tw_reuse; 2276 #endif 2277 2278 extern atomic_t tcp_memory_allocated; 2279 extern atomic_t tcp_sockets_allocated; 2280 extern int tcp_memory_pressure; 2281 2282 struct open_request; 2283 2284 struct or_calltable { 2285 int family; 2286 int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*); 2287 void (*send_ack) (struct sk_buff *skb, struct open_request *req); 2288 void (*destructor) (struct open_request *req); 2289 void (*send_reset) (struct sk_buff *skb); 2290 }; 2291 2292 struct tcp_v4_open_req { 2293 __u32 loc_addr; 2294 __u32 rmt_addr; 2295 struct ip_options *opt; 2296 }; 2297 2298 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 2299 struct tcp_v6_open_req { 2300 struct in6_addr loc_addr; 2301 struct in6_addr rmt_addr; 2302 struct sk_buff *pktopts; 2303 int iif; 2304 }; 2305 #endif 2306 2307 /* this structure is too big */ 2308 struct open_request { 2309 struct open_request *dl_next; /* Must be first member! */ 2310 __u32 rcv_isn; 2311 __u32 snt_isn; 2312 __u16 rmt_port; 2313 __u16 mss; 2314 __u8 retrans; 2315 __u8 __pad; 2316 __u16 snd_wscale : 4, 2317 rcv_wscale : 4, 2318 tstamp_ok : 1, 2319 sack_ok : 1, 2320 wscale_ok : 1, 2321 ecn_ok : 1, 2322 acked : 1; 2323 /* The following two fields can be easily recomputed I think -AK */ 2324 __u32 window_clamp; /* window clamp at creation time */ 2325 __u32 rcv_wnd; /* rcv_wnd offered first time */ 2326 __u32 ts_recent; 2327 unsigned long expires; 2328 struct or_calltable *class; 2329 struct sock *sk; 2330 union { 2331 struct tcp_v4_open_req v4_req; 2332 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 2333 struct tcp_v6_open_req v6_req; 2334 #endif 2335 } af; 2336 }; 2337 2338 /* SLAB cache for open requests. */ 2339 extern kmem_cache_t *tcp_openreq_cachep; 2340 2341 #define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC) 2342 #define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req) 2343 2344 static __inline void tcp_openreq_free(struct open_request *req) 2345 { 2346 req->class->destructor(req); 2347 tcp_openreq_fastfree(req); 2348 } 2349 2350 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 2351 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET) 2352 #else 2353 #define TCP_INET_FAMILY(fam) 1 2354 #endif 2355 2356 /* 2357 * Pointers to address related TCP functions 2358 * (i.e. things that depend on the address family) 2359 * 2360 * BUGGG_FUTURE: all the idea behind this struct is wrong. 2361 * It mixes socket frontend with transport function. 2362 * With port sharing between IPv6/v4 it gives the only advantage, 2363 * only poor IPv6 needs to permanently recheck, that it 2364 * is still IPv6 8)8) It must be cleaned up as soon as possible. 2365 * --ANK (980802) 2366 */ 2367 2368 struct tcp_func { 2369 int (*queue_xmit) (struct sk_buff *skb); 2370 2371 void (*send_check) (struct sock *sk, 2372 struct tcphdr *th, 2373 int len, 2374 struct sk_buff *skb); 2375 2376 int (*rebuild_header) (struct sock *sk); 2377 2378 int (*conn_request) (struct sock *sk, 2379 struct sk_buff *skb); 2380 2381 struct sock * (*syn_recv_sock) (struct sock *sk, 2382 struct sk_buff *skb, 2383 struct open_request *req, 2384 struct dst_entry *dst); 2385 2386 int (*remember_stamp) (struct sock *sk); 2387 2388 __u16 net_header_len; 2389 2390 int (*setsockopt) (struct sock *sk, 2391 int level, 2392 int optname, 2393 char *optval, 2394 int optlen); 2395 2396 int (*getsockopt) (struct sock *sk, 2397 int level, 2398 int optname, 2399 char *optval, 2400 int *optlen); 2401 2402 2403 void (*addr2sockaddr) (struct sock *sk, 2404 struct sockaddr *); 2405 2406 int sockaddr_len; 2407 }; 2408 2409 /* 2410 * The next routines deal with comparing 32 bit unsigned ints 2411 * and worry about wraparound (automatic with unsigned arithmetic). 2412 */ 2413 2414 extern __inline int before(__u32 seq1, __u32 seq2) 2415 { 2416 return (__s32)(seq1-seq2) < 0; 2417 } 2418 2419 extern __inline int after(__u32 seq1, __u32 seq2) 2420 { 2421 return (__s32)(seq2-seq1) < 0; 2422 } 2423 2424 2425 /* is s2<=s1<=s3 ? */ 2426 extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3) 2427 { 2428 return seq3 - seq2 >= seq1 - seq2; 2429 } 2430 2431 2432 extern struct proto tcp_prot; 2433 2434 #ifdef ROS_STATISTICS 2435 extern struct tcp_mib tcp_statistics[NR_CPUS*2]; 2436 2437 #define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field) 2438 #define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field) 2439 #define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field) 2440 #endif 2441 2442 extern void tcp_put_port(struct sock *sk); 2443 extern void __tcp_put_port(struct sock *sk); 2444 extern void tcp_inherit_port(struct sock *sk, struct sock *child); 2445 2446 extern void tcp_v4_err(struct sk_buff *skb, u32); 2447 2448 extern void tcp_shutdown (struct sock *sk, int how); 2449 2450 extern int tcp_v4_rcv(struct sk_buff *skb); 2451 2452 extern int tcp_v4_remember_stamp(struct sock *sk); 2453 2454 extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); 2455 2456 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size); 2457 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); 2458 2459 extern int tcp_ioctl(struct sock *sk, 2460 int cmd, 2461 unsigned long arg); 2462 2463 extern int tcp_rcv_state_process(struct sock *sk, 2464 struct sk_buff *skb, 2465 struct tcphdr *th, 2466 unsigned len); 2467 2468 extern int tcp_rcv_established(struct sock *sk, 2469 struct sk_buff *skb, 2470 struct tcphdr *th, 2471 unsigned len); 2472 2473 enum tcp_ack_state_t 2474 { 2475 TCP_ACK_SCHED = 1, 2476 TCP_ACK_TIMER = 2, 2477 TCP_ACK_PUSHED= 4 2478 }; 2479 2480 static __inline void tcp_schedule_ack(struct tcp_opt *tp) 2481 { 2482 tp->ack.pending |= TCP_ACK_SCHED; 2483 } 2484 2485 static __inline int tcp_ack_scheduled(struct tcp_opt *tp) 2486 { 2487 return tp->ack.pending&TCP_ACK_SCHED; 2488 } 2489 2490 static __inline void tcp_dec_quickack_mode(struct tcp_opt *tp) 2491 { 2492 if (tp->ack.quick && --tp->ack.quick == 0) { 2493 /* Leaving quickack mode we deflate ATO. */ 2494 tp->ack.ato = TCP_ATO_MIN; 2495 } 2496 } 2497 2498 extern void tcp_enter_quickack_mode(struct tcp_opt *tp); 2499 2500 static __inline void tcp_delack_init(struct tcp_opt *tp) 2501 { 2502 memset(&tp->ack, 0, sizeof(tp->ack)); 2503 } 2504 2505 static __inline void tcp_clear_options(struct tcp_opt *tp) 2506 { 2507 tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0; 2508 } 2509 2510 enum tcp_tw_status 2511 { 2512 TCP_TW_SUCCESS = 0, 2513 TCP_TW_RST = 1, 2514 TCP_TW_ACK = 2, 2515 TCP_TW_SYN = 3 2516 }; 2517 2518 2519 extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, 2520 struct sk_buff *skb, 2521 struct tcphdr *th, 2522 unsigned len); 2523 2524 extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, 2525 struct open_request *req, 2526 struct open_request **prev); 2527 extern int tcp_child_process(struct sock *parent, 2528 struct sock *child, 2529 struct sk_buff *skb); 2530 extern void tcp_enter_loss(struct sock *sk, int how); 2531 extern void tcp_clear_retrans(struct tcp_opt *tp); 2532 extern void tcp_update_metrics(struct sock *sk); 2533 2534 extern void tcp_close(struct sock *sk, 2535 long timeout); 2536 extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); 2537 extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); 2538 extern void tcp_write_space(struct sock *sk); 2539 2540 extern int tcp_getsockopt(struct sock *sk, int level, 2541 int optname, char *optval, 2542 int *optlen); 2543 extern int tcp_setsockopt(struct sock *sk, int level, 2544 int optname, char *optval, 2545 int optlen); 2546 extern void tcp_set_keepalive(struct sock *sk, int val); 2547 extern int tcp_recvmsg(struct sock *sk, 2548 struct msghdr *msg, 2549 int len, int nonblock, 2550 int flags, int *addr_len); 2551 2552 extern int tcp_listen_start(struct sock *sk); 2553 2554 extern void tcp_parse_options(struct sk_buff *skb, 2555 struct tcp_opt *tp, 2556 int estab); 2557 2558 /* 2559 * TCP v4 functions exported for the inet6 API 2560 */ 2561 2562 extern int tcp_v4_rebuild_header(struct sock *sk); 2563 2564 extern int tcp_v4_build_header(struct sock *sk, 2565 struct sk_buff *skb); 2566 2567 extern void tcp_v4_send_check(struct sock *sk, 2568 struct tcphdr *th, int len, 2569 struct sk_buff *skb); 2570 2571 extern int tcp_v4_conn_request(struct sock *sk, 2572 struct sk_buff *skb); 2573 2574 extern struct sock * tcp_create_openreq_child(struct sock *sk, 2575 struct open_request *req, 2576 struct sk_buff *skb); 2577 2578 extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, 2579 struct sk_buff *skb, 2580 struct open_request *req, 2581 struct dst_entry *dst); 2582 2583 extern int tcp_v4_do_rcv(struct sock *sk, 2584 struct sk_buff *skb); 2585 2586 extern int tcp_v4_connect(struct sock *sk, 2587 struct sockaddr *uaddr, 2588 int addr_len); 2589 2590 extern int tcp_connect(struct sock *sk); 2591 2592 extern struct sk_buff * tcp_make_synack(struct sock *sk, 2593 struct dst_entry *dst, 2594 struct open_request *req); 2595 2596 extern int tcp_disconnect(struct sock *sk, int flags); 2597 2598 extern void tcp_unhash(struct sock *sk); 2599 2600 extern int tcp_v4_hash_connecting(struct sock *sk); 2601 2602 2603 /* From syncookies.c */ 2604 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 2605 struct ip_options *opt); 2606 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 2607 __u16 *mss); 2608 2609 /* tcp_output.c */ 2610 2611 extern int tcp_write_xmit(struct sock *, int nonagle); 2612 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); 2613 extern void tcp_xmit_retransmit_queue(struct sock *); 2614 extern void tcp_simple_retransmit(struct sock *); 2615 2616 extern void tcp_send_probe0(struct sock *); 2617 extern void tcp_send_partial(struct sock *); 2618 extern int tcp_write_wakeup(struct sock *); 2619 extern void tcp_send_fin(struct sock *sk); 2620 extern void tcp_send_active_reset(struct sock *sk, int priority); 2621 extern int tcp_send_synack(struct sock *); 2622 extern int tcp_transmit_skb(struct sock *, struct sk_buff *); 2623 extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now); 2624 extern void tcp_push_one(struct sock *, unsigned mss_now); 2625 extern void tcp_send_ack(struct sock *sk); 2626 extern void tcp_send_delayed_ack(struct sock *sk); 2627 2628 /* tcp_timer.c */ 2629 extern void tcp_init_xmit_timers(struct sock *); 2630 extern void tcp_clear_xmit_timers(struct sock *); 2631 2632 extern void tcp_delete_keepalive_timer (struct sock *); 2633 extern void tcp_reset_keepalive_timer (struct sock *, unsigned long); 2634 extern int tcp_sync_mss(struct sock *sk, u32 pmtu); 2635 2636 extern const char timer_bug_msg[]; 2637 2638 /* Read 'sendfile()'-style from a TCP socket */ 2639 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, 2640 unsigned int, size_t); 2641 extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, 2642 sk_read_actor_t recv_actor); 2643 2644 static __inline void tcp_clear_xmit_timer(struct sock *sk, int what) 2645 { 2646 #if 0 2647 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 2648 2649 switch (what) { 2650 case TCP_TIME_RETRANS: 2651 case TCP_TIME_PROBE0: 2652 tp->pending = 0; 2653 2654 #ifdef TCP_CLEAR_TIMERS 2655 if (timer_pending(&tp->retransmit_timer) && 2656 del_timer(&tp->retransmit_timer)) 2657 __sock_put(sk); 2658 #endif 2659 break; 2660 case TCP_TIME_DACK: 2661 tp->ack.blocked = 0; 2662 tp->ack.pending = 0; 2663 2664 #ifdef TCP_CLEAR_TIMERS 2665 if (timer_pending(&tp->delack_timer) && 2666 del_timer(&tp->delack_timer)) 2667 __sock_put(sk); 2668 #endif 2669 break; 2670 default: 2671 printk(timer_bug_msg); 2672 return; 2673 }; 2674 #endif 2675 } 2676 2677 /* 2678 * Reset the retransmission timer 2679 */ 2680 static __inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) 2681 { 2682 #if 0 2683 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 2684 2685 if (when > TCP_RTO_MAX) { 2686 #ifdef TCP_DEBUG 2687 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); 2688 #endif 2689 when = TCP_RTO_MAX; 2690 } 2691 2692 switch (what) { 2693 case TCP_TIME_RETRANS: 2694 case TCP_TIME_PROBE0: 2695 tp->pending = what; 2696 tp->timeout = jiffies+when; 2697 if (!mod_timer(&tp->retransmit_timer, tp->timeout)) 2698 sock_hold(sk); 2699 break; 2700 2701 case TCP_TIME_DACK: 2702 tp->ack.pending |= TCP_ACK_TIMER; 2703 tp->ack.timeout = jiffies+when; 2704 if (!mod_timer(&tp->delack_timer, tp->ack.timeout)) 2705 sock_hold(sk); 2706 break; 2707 2708 default: 2709 printk(KERN_DEBUG "bug: unknown timer value\n"); 2710 }; 2711 #endif 2712 } 2713 2714 /* Compute the current effective MSS, taking SACKs and IP options, 2715 * and even PMTU discovery events into account. 2716 */ 2717 2718 static __inline unsigned int tcp_current_mss(struct sock *sk) 2719 { 2720 #if 0 2721 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 2722 struct dst_entry *dst = __sk_dst_get(sk); 2723 int mss_now = tp->mss_cache; 2724 2725 if (dst && dst->pmtu != tp->pmtu_cookie) 2726 mss_now = tcp_sync_mss(sk, dst->pmtu); 2727 2728 if (tp->eff_sacks) 2729 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + 2730 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)); 2731 return mss_now; 2732 #else 2733 return 0; 2734 #endif 2735 } 2736 2737 /* Initialize RCV_MSS value. 2738 * RCV_MSS is an our guess about MSS used by the peer. 2739 * We haven't any direct information about the MSS. 2740 * It's better to underestimate the RCV_MSS rather than overestimate. 2741 * Overestimations make us ACKing less frequently than needed. 2742 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). 2743 */ 2744 2745 static __inline void tcp_initialize_rcv_mss(struct sock *sk) 2746 { 2747 #if 0 2748 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 2749 unsigned int hint = min(tp->advmss, tp->mss_cache); 2750 2751 hint = min(hint, tp->rcv_wnd/2); 2752 hint = min(hint, TCP_MIN_RCVMSS); 2753 hint = max(hint, TCP_MIN_MSS); 2754 2755 tp->ack.rcv_mss = hint; 2756 #endif 2757 } 2758 2759 static __inline void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd) 2760 { 2761 #if 0 2762 tp->pred_flags = htonl((tp->tcp_header_len << 26) | 2763 ntohl(TCP_FLAG_ACK) | 2764 snd_wnd); 2765 #endif 2766 } 2767 2768 static __inline void tcp_fast_path_on(struct tcp_opt *tp) 2769 { 2770 #if 0 2771 __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale); 2772 #endif 2773 } 2774 2775 static __inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp) 2776 { 2777 #if 0 2778 if (skb_queue_len(&tp->out_of_order_queue) == 0 && 2779 tp->rcv_wnd && 2780 atomic_read(&sk->rmem_alloc) < sk->rcvbuf && 2781 !tp->urg_data) 2782 tcp_fast_path_on(tp); 2783 #endif 2784 } 2785 2786 /* Compute the actual receive window we are currently advertising. 2787 * Rcv_nxt can be after the window if our peer push more data 2788 * than the offered window. 2789 */ 2790 static __inline u32 tcp_receive_window(struct tcp_opt *tp) 2791 { 2792 #if 0 2793 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; 2794 2795 if (win < 0) 2796 win = 0; 2797 return (u32) win; 2798 #else 2799 return 0; 2800 #endif 2801 } 2802 2803 /* Choose a new window, without checks for shrinking, and without 2804 * scaling applied to the result. The caller does these things 2805 * if necessary. This is a "raw" window selection. 2806 */ 2807 extern u32 __tcp_select_window(struct sock *sk); 2808 2809 /* TCP timestamps are only 32-bits, this causes a slight 2810 * complication on 64-bit systems since we store a snapshot 2811 * of jiffies in the buffer control blocks below. We decidely 2812 * only use of the low 32-bits of jiffies and hide the ugly 2813 * casts with the following macro. 2814 */ 2815 #define tcp_time_stamp ((__u32)(jiffies)) 2816 2817 /* This is what the send packet queueing engine uses to pass 2818 * TCP per-packet control information to the transmission 2819 * code. We also store the host-order sequence numbers in 2820 * here too. This is 36 bytes on 32-bit architectures, 2821 * 40 bytes on 64-bit machines, if this grows please adjust 2822 * skbuff.h:skbuff->cb[xxx] size appropriately. 2823 */ 2824 struct tcp_skb_cb { 2825 union { 2826 #if 0 2827 struct inet_skb_parm h4; 2828 #endif 2829 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 2830 struct inet6_skb_parm h6; 2831 #endif 2832 } header; /* For incoming frames */ 2833 __u32 seq; /* Starting sequence number */ 2834 __u32 end_seq; /* SEQ + FIN + SYN + datalen */ 2835 __u32 when; /* used to compute rtt's */ 2836 __u8 flags; /* TCP header flags. */ 2837 2838 /* NOTE: These must match up to the flags byte in a 2839 * real TCP header. 2840 */ 2841 #define TCPCB_FLAG_FIN 0x01 2842 #define TCPCB_FLAG_SYN 0x02 2843 #define TCPCB_FLAG_RST 0x04 2844 #define TCPCB_FLAG_PSH 0x08 2845 #define TCPCB_FLAG_ACK 0x10 2846 #define TCPCB_FLAG_URG 0x20 2847 #define TCPCB_FLAG_ECE 0x40 2848 #define TCPCB_FLAG_CWR 0x80 2849 2850 __u8 sacked; /* State flags for SACK/FACK. */ 2851 #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ 2852 #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ 2853 #define TCPCB_LOST 0x04 /* SKB is lost */ 2854 #define TCPCB_TAGBITS 0x07 /* All tag bits */ 2855 2856 #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ 2857 #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) 2858 2859 #define TCPCB_URG 0x20 /* Urgent pointer advenced here */ 2860 2861 #define TCPCB_AT_TAIL (TCPCB_URG) 2862 2863 __u16 urg_ptr; /* Valid w/URG flags is set. */ 2864 __u32 ack_seq; /* Sequence number ACK'd */ 2865 }; 2866 2867 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) 2868 2869 #define for_retrans_queue(skb, sk, tp) \ 2870 for (skb = (sk)->write_queue.next; \ 2871 (skb != (tp)->send_head) && \ 2872 (skb != (struct sk_buff *)&(sk)->write_queue); \ 2873 skb=skb->next) 2874 2875 2876 //#include <net/tcp_ecn.h> 2877 2878 2879 /* 2880 * Compute minimal free write space needed to queue new packets. 2881 */ 2882 static __inline int tcp_min_write_space(struct sock *sk) 2883 { 2884 #if 0 2885 return sk->wmem_queued/2; 2886 #else 2887 return 0; 2888 #endif 2889 } 2890 2891 static __inline int tcp_wspace(struct sock *sk) 2892 { 2893 #if 0 2894 return sk->sndbuf - sk->wmem_queued; 2895 #else 2896 return 0; 2897 #endif 2898 } 2899 2900 2901 /* This determines how many packets are "in the network" to the best 2902 * of our knowledge. In many cases it is conservative, but where 2903 * detailed information is available from the receiver (via SACK 2904 * blocks etc.) we can make more aggressive calculations. 2905 * 2906 * Use this for decisions involving congestion control, use just 2907 * tp->packets_out to determine if the send queue is empty or not. 2908 * 2909 * Read this equation as: 2910 * 2911 * "Packets sent once on transmission queue" MINUS 2912 * "Packets left network, but not honestly ACKed yet" PLUS 2913 * "Packets fast retransmitted" 2914 */ 2915 static __inline unsigned int tcp_packets_in_flight(struct tcp_opt *tp) 2916 { 2917 #if 0 2918 return tp->packets_out - tp->left_out + tp->retrans_out; 2919 #else 2920 return 0; 2921 #endif 2922 } 2923 2924 /* Recalculate snd_ssthresh, we want to set it to: 2925 * 2926 * one half the current congestion window, but no 2927 * less than two segments 2928 */ 2929 static __inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) 2930 { 2931 #if 0 2932 return max(tp->snd_cwnd >> 1U, 2U); 2933 #else 2934 return 0; 2935 #endif 2936 } 2937 2938 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. 2939 * The exception is rate halving phase, when cwnd is decreasing towards 2940 * ssthresh. 2941 */ 2942 static __inline __u32 tcp_current_ssthresh(struct tcp_opt *tp) 2943 { 2944 #if 0 2945 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) 2946 return tp->snd_ssthresh; 2947 else 2948 return max(tp->snd_ssthresh, 2949 ((tp->snd_cwnd >> 1) + 2950 (tp->snd_cwnd >> 2))); 2951 #else 2952 return 0; 2953 #endif 2954 } 2955 2956 static __inline void tcp_sync_left_out(struct tcp_opt *tp) 2957 { 2958 #if 0 2959 if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out) 2960 tp->sacked_out = tp->packets_out - tp->lost_out; 2961 tp->left_out = tp->sacked_out + tp->lost_out; 2962 #endif 2963 } 2964 2965 extern void tcp_cwnd_application_limited(struct sock *sk); 2966 2967 /* Congestion window validation. (RFC2861) */ 2968 2969 static __inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp) 2970 { 2971 #if 0 2972 if (tp->packets_out >= tp->snd_cwnd) { 2973 /* Network is feed fully. */ 2974 tp->snd_cwnd_used = 0; 2975 tp->snd_cwnd_stamp = tcp_time_stamp; 2976 } else { 2977 /* Network starves. */ 2978 if (tp->packets_out > tp->snd_cwnd_used) 2979 tp->snd_cwnd_used = tp->packets_out; 2980 2981 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) 2982 tcp_cwnd_application_limited(sk); 2983 } 2984 #endif 2985 } 2986 2987 /* Set slow start threshould and cwnd not falling to slow start */ 2988 static __inline void __tcp_enter_cwr(struct tcp_opt *tp) 2989 { 2990 #if 0 2991 tp->undo_marker = 0; 2992 tp->snd_ssthresh = tcp_recalc_ssthresh(tp); 2993 tp->snd_cwnd = min(tp->snd_cwnd, 2994 tcp_packets_in_flight(tp) + 1U); 2995 tp->snd_cwnd_cnt = 0; 2996 tp->high_seq = tp->snd_nxt; 2997 tp->snd_cwnd_stamp = tcp_time_stamp; 2998 TCP_ECN_queue_cwr(tp); 2999 #endif 3000 } 3001 3002 static __inline void tcp_enter_cwr(struct tcp_opt *tp) 3003 { 3004 #if 0 3005 tp->prior_ssthresh = 0; 3006 if (tp->ca_state < TCP_CA_CWR) { 3007 __tcp_enter_cwr(tp); 3008 tp->ca_state = TCP_CA_CWR; 3009 } 3010 #endif 3011 } 3012 3013 extern __u32 tcp_init_cwnd(struct tcp_opt *tp); 3014 3015 /* Slow start with delack produces 3 packets of burst, so that 3016 * it is safe "de facto". 3017 */ 3018 static __inline __u32 tcp_max_burst(struct tcp_opt *tp) 3019 { 3020 return 3; 3021 } 3022 3023 static __inline__ int tcp_minshall_check(struct tcp_opt *tp) 3024 { 3025 #if 0 3026 return after(tp->snd_sml,tp->snd_una) && 3027 !after(tp->snd_sml, tp->snd_nxt); 3028 #else 3029 return 0; 3030 #endif 3031 } 3032 3033 static __inline void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb) 3034 { 3035 #if 0 3036 if (skb->len < mss) 3037 tp->snd_sml = TCP_SKB_CB(skb)->end_seq; 3038 #endif 3039 } 3040 3041 /* Return 0, if packet can be sent now without violation Nagle's rules: 3042 1. It is full sized. 3043 2. Or it contains FIN. 3044 3. Or TCP_NODELAY was set. 3045 4. Or TCP_CORK is not set, and all sent packets are ACKed. 3046 With Minshall's modification: all sent small packets are ACKed. 3047 */ 3048 3049 static __inline int 3050 tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle) 3051 { 3052 #if 0 3053 return (skb->len < mss_now && 3054 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 3055 (nonagle == 2 || 3056 (!nonagle && 3057 tp->packets_out && 3058 tcp_minshall_check(tp)))); 3059 #else 3060 return 0; 3061 #endif 3062 } 3063 3064 /* This checks if the data bearing packet SKB (usually tp->send_head) 3065 * should be put on the wire right now. 3066 */ 3067 static __inline int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, 3068 unsigned cur_mss, int nonagle) 3069 { 3070 #if 0 3071 /* RFC 1122 - section 4.2.3.4 3072 * 3073 * We must queue if 3074 * 3075 * a) The right edge of this frame exceeds the window 3076 * b) There are packets in flight and we have a small segment 3077 * [SWS avoidance and Nagle algorithm] 3078 * (part of SWS is done on packetization) 3079 * Minshall version sounds: there are no _small_ 3080 * segments in flight. (tcp_nagle_check) 3081 * c) We have too many packets 'in flight' 3082 * 3083 * Don't use the nagle rule for urgent data (or 3084 * for the final FIN -DaveM). 3085 * 3086 * Also, Nagle rule does not apply to frames, which 3087 * sit in the middle of queue (they have no chances 3088 * to get new data) and if room at tail of skb is 3089 * not enough to save something seriously (<32 for now). 3090 */ 3091 3092 /* Don't be strict about the congestion window for the 3093 * final FIN frame. -DaveM 3094 */ 3095 return ((nonagle==1 || tp->urg_mode 3096 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && 3097 ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || 3098 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && 3099 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); 3100 #else 3101 return 0; 3102 #endif 3103 } 3104 3105 static __inline void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) 3106 { 3107 #if 0 3108 if (!tp->packets_out && !tp->pending) 3109 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); 3110 #endif 3111 } 3112 3113 static __inline int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb) 3114 { 3115 #if 0 3116 return (skb->next == (struct sk_buff*)&sk->write_queue); 3117 #else 3118 return 0; 3119 #endif 3120 } 3121 3122 /* Push out any pending frames which were held back due to 3123 * TCP_CORK or attempt at coalescing tiny packets. 3124 * The socket must be locked by the caller. 3125 */ 3126 static __inline void __tcp_push_pending_frames(struct sock *sk, 3127 struct tcp_opt *tp, 3128 unsigned cur_mss, 3129 int nonagle) 3130 { 3131 #if 0 3132 struct sk_buff *skb = tp->send_head; 3133 3134 if (skb) { 3135 if (!tcp_skb_is_last(sk, skb)) 3136 nonagle = 1; 3137 if (!tcp_snd_test(tp, skb, cur_mss, nonagle) || 3138 tcp_write_xmit(sk, nonagle)) 3139 tcp_check_probe_timer(sk, tp); 3140 } 3141 tcp_cwnd_validate(sk, tp); 3142 #endif 3143 } 3144 3145 static __inline void tcp_push_pending_frames(struct sock *sk, 3146 struct tcp_opt *tp) 3147 { 3148 #if 0 3149 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle); 3150 #endif 3151 } 3152 3153 static __inline int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp) 3154 { 3155 #if 0 3156 struct sk_buff *skb = tp->send_head; 3157 3158 return (skb && 3159 tcp_snd_test(tp, skb, tcp_current_mss(sk), 3160 tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle)); 3161 #else 3162 return 0; 3163 #endif 3164 } 3165 3166 static __inline void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq) 3167 { 3168 #if 0 3169 tp->snd_wl1 = seq; 3170 #endif 3171 } 3172 3173 static __inline void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq) 3174 { 3175 #if 0 3176 tp->snd_wl1 = seq; 3177 #endif 3178 } 3179 3180 extern void tcp_destroy_sock(struct sock *sk); 3181 3182 3183 /* 3184 * Calculate(/check) TCP checksum 3185 */ 3186 static __inline u16 tcp_v4_check(struct tcphdr *th, int len, 3187 unsigned long saddr, unsigned long daddr, 3188 unsigned long base) 3189 { 3190 #if 0 3191 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); 3192 #else 3193 return 0; 3194 #endif 3195 } 3196 3197 static __inline int __tcp_checksum_complete(struct sk_buff *skb) 3198 { 3199 #if 0 3200 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); 3201 #else 3202 return 0; 3203 #endif 3204 } 3205 3206 static __inline int tcp_checksum_complete(struct sk_buff *skb) 3207 { 3208 #if 0 3209 return skb->ip_summed != CHECKSUM_UNNECESSARY && 3210 __tcp_checksum_complete(skb); 3211 #else 3212 return 0; 3213 #endif 3214 } 3215 3216 /* Prequeue for VJ style copy to user, combined with checksumming. */ 3217 3218 static __inline void tcp_prequeue_init(struct tcp_opt *tp) 3219 { 3220 #if 0 3221 tp->ucopy.task = NULL; 3222 tp->ucopy.len = 0; 3223 tp->ucopy.memory = 0; 3224 skb_queue_head_init(&tp->ucopy.prequeue); 3225 #endif 3226 } 3227 3228 /* Packet is added to VJ-style prequeue for processing in process 3229 * context, if a reader task is waiting. Apparently, this exciting 3230 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) 3231 * failed somewhere. Latency? Burstiness? Well, at least now we will 3232 * see, why it failed. 8)8) --ANK 3233 * 3234 * NOTE: is this not too big to inline? 3235 */ 3236 static __inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) 3237 { 3238 #if 0 3239 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 3240 3241 if (tp->ucopy.task) { 3242 __skb_queue_tail(&tp->ucopy.prequeue, skb); 3243 tp->ucopy.memory += skb->truesize; 3244 if (tp->ucopy.memory > sk->rcvbuf) { 3245 struct sk_buff *skb1; 3246 3247 if (sk->lock.users) 3248 out_of_line_bug(); 3249 3250 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 3251 sk->backlog_rcv(sk, skb1); 3252 NET_INC_STATS_BH(TCPPrequeueDropped); 3253 } 3254 3255 tp->ucopy.memory = 0; 3256 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 3257 wake_up_interruptible(sk->sleep); 3258 if (!tcp_ack_scheduled(tp)) 3259 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); 3260 } 3261 return 1; 3262 } 3263 return 0; 3264 #else 3265 return 0; 3266 #endif 3267 } 3268 3269 3270 #undef STATE_TRACE 3271 3272 #ifdef STATE_TRACE 3273 static char *statename[]={ 3274 "Unused","Established","Syn Sent","Syn Recv", 3275 "Fin Wait 1","Fin Wait 2","Time Wait", "Close", 3276 "Close Wait","Last ACK","Listen","Closing" 3277 }; 3278 #endif 3279 3280 static __inline void tcp_set_state(struct sock *sk, int state) 3281 { 3282 #if 0 3283 int oldstate = sk->state; 3284 3285 switch (state) { 3286 case TCP_ESTABLISHED: 3287 if (oldstate != TCP_ESTABLISHED) 3288 TCP_INC_STATS(TcpCurrEstab); 3289 break; 3290 3291 case TCP_CLOSE: 3292 sk->prot->unhash(sk); 3293 if (sk->prev && !(sk->userlocks&SOCK_BINDPORT_LOCK)) 3294 tcp_put_port(sk); 3295 /* fall through */ 3296 default: 3297 if (oldstate==TCP_ESTABLISHED) 3298 tcp_statistics[smp_processor_id()*2+!in_softirq()].TcpCurrEstab--; 3299 } 3300 3301 /* Change state AFTER socket is unhashed to avoid closed 3302 * socket sitting in hash tables. 3303 */ 3304 sk->state = state; 3305 3306 #ifdef STATE_TRACE 3307 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]); 3308 #endif 3309 #endif 3310 } 3311 3312 static __inline void tcp_done(struct sock *sk) 3313 { 3314 #if 0 3315 tcp_set_state(sk, TCP_CLOSE); 3316 tcp_clear_xmit_timers(sk); 3317 3318 sk->shutdown = SHUTDOWN_MASK; 3319 3320 if (!sk->dead) 3321 sk->state_change(sk); 3322 else 3323 tcp_destroy_sock(sk); 3324 #endif 3325 } 3326 3327 static __inline void tcp_sack_reset(struct tcp_opt *tp) 3328 { 3329 #if 0 3330 tp->dsack = 0; 3331 tp->eff_sacks = 0; 3332 tp->num_sacks = 0; 3333 #endif 3334 } 3335 3336 static __inline void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp) 3337 { 3338 #if 0 3339 if (tp->tstamp_ok) { 3340 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | 3341 (TCPOPT_NOP << 16) | 3342 (TCPOPT_TIMESTAMP << 8) | 3343 TCPOLEN_TIMESTAMP); 3344 *ptr++ = htonl(tstamp); 3345 *ptr++ = htonl(tp->ts_recent); 3346 } 3347 if (tp->eff_sacks) { 3348 struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks; 3349 int this_sack; 3350 3351 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | 3352 (TCPOPT_NOP << 16) | 3353 (TCPOPT_SACK << 8) | 3354 (TCPOLEN_SACK_BASE + 3355 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK))); 3356 for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) { 3357 *ptr++ = htonl(sp[this_sack].start_seq); 3358 *ptr++ = htonl(sp[this_sack].end_seq); 3359 } 3360 if (tp->dsack) { 3361 tp->dsack = 0; 3362 tp->eff_sacks--; 3363 } 3364 } 3365 #endif 3366 } 3367 3368 /* Construct a tcp options header for a SYN or SYN_ACK packet. 3369 * If this is every changed make sure to change the definition of 3370 * MAX_SYN_SIZE to match the new maximum number of options that you 3371 * can generate. 3372 */ 3373 static __inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, 3374 int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) 3375 { 3376 #if 0 3377 /* We always get an MSS option. 3378 * The option bytes which will be seen in normal data 3379 * packets should timestamps be used, must be in the MSS 3380 * advertised. But we subtract them from tp->mss_cache so 3381 * that calculations in tcp_sendmsg are simpler etc. 3382 * So account for this fact here if necessary. If we 3383 * don't do this correctly, as a receiver we won't 3384 * recognize data packets as being full sized when we 3385 * should, and thus we won't abide by the delayed ACK 3386 * rules correctly. 3387 * SACKs don't matter, we never delay an ACK when we 3388 * have any of those going out. 3389 */ 3390 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); 3391 if (ts) { 3392 if(sack) 3393 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | 3394 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 3395 else 3396 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 3397 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 3398 *ptr++ = htonl(tstamp); /* TSVAL */ 3399 *ptr++ = htonl(ts_recent); /* TSECR */ 3400 } else if(sack) 3401 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 3402 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); 3403 if (offer_wscale) 3404 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); 3405 #endif 3406 } 3407 3408 /* Determine a window scaling and initial window to offer. 3409 * Based on the assumption that the given amount of space 3410 * will be offered. Store the results in the tp structure. 3411 * NOTE: for smooth operation initial space offering should 3412 * be a multiple of mss if possible. We assume here that mss >= 1. 3413 * This MUST be enforced by all callers. 3414 */ 3415 static __inline void tcp_select_initial_window(int __space, __u32 mss, 3416 __u32 *rcv_wnd, 3417 __u32 *window_clamp, 3418 int wscale_ok, 3419 __u8 *rcv_wscale) 3420 { 3421 #if 0 3422 unsigned int space = (__space < 0 ? 0 : __space); 3423 3424 /* If no clamp set the clamp to the max possible scaled window */ 3425 if (*window_clamp == 0) 3426 (*window_clamp) = (65535 << 14); 3427 space = min(*window_clamp, space); 3428 3429 /* Quantize space offering to a multiple of mss if possible. */ 3430 if (space > mss) 3431 space = (space / mss) * mss; 3432 3433 /* NOTE: offering an initial window larger than 32767 3434 * will break some buggy TCP stacks. We try to be nice. 3435 * If we are not window scaling, then this truncates 3436 * our initial window offering to 32k. There should also 3437 * be a sysctl option to stop being nice. 3438 */ 3439 (*rcv_wnd) = min(space, MAX_TCP_WINDOW); 3440 (*rcv_wscale) = 0; 3441 if (wscale_ok) { 3442 /* See RFC1323 for an explanation of the limit to 14 */ 3443 while (space > 65535 && (*rcv_wscale) < 14) { 3444 space >>= 1; 3445 (*rcv_wscale)++; 3446 } 3447 if (*rcv_wscale && sysctl_tcp_app_win && space>=mss && 3448 space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2) 3449 (*rcv_wscale)--; 3450 } 3451 3452 /* Set initial window to value enough for senders, 3453 * following RFC1414. Senders, not following this RFC, 3454 * will be satisfied with 2. 3455 */ 3456 if (mss > (1<<*rcv_wscale)) { 3457 int init_cwnd = 4; 3458 if (mss > 1460*3) 3459 init_cwnd = 2; 3460 else if (mss > 1460) 3461 init_cwnd = 3; 3462 if (*rcv_wnd > init_cwnd*mss) 3463 *rcv_wnd = init_cwnd*mss; 3464 } 3465 /* Set the clamp no higher than max representable value */ 3466 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); 3467 #endif 3468 } 3469 3470 static __inline int tcp_win_from_space(int space) 3471 { 3472 #if 0 3473 return sysctl_tcp_adv_win_scale<=0 ? 3474 (space>>(-sysctl_tcp_adv_win_scale)) : 3475 space - (space>>sysctl_tcp_adv_win_scale); 3476 #else 3477 return 0; 3478 #endif 3479 } 3480 3481 /* Note: caller must be prepared to deal with negative returns */ 3482 static __inline int tcp_space(struct sock *sk) 3483 { 3484 #if 0 3485 return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc)); 3486 #else 3487 return 0; 3488 #endif 3489 } 3490 3491 static __inline int tcp_full_space( struct sock *sk) 3492 { 3493 #if 0 3494 return tcp_win_from_space(sk->rcvbuf); 3495 #else 3496 return 0; 3497 #endif 3498 } 3499 3500 static __inline void tcp_acceptq_removed(struct sock *sk) 3501 { 3502 #if 0 3503 sk->ack_backlog--; 3504 #endif 3505 } 3506 3507 static __inline void tcp_acceptq_added(struct sock *sk) 3508 { 3509 #if 0 3510 sk->ack_backlog++; 3511 #endif 3512 } 3513 3514 static __inline int tcp_acceptq_is_full(struct sock *sk) 3515 { 3516 #if 0 3517 return sk->ack_backlog > sk->max_ack_backlog; 3518 #else 3519 return 0; 3520 #endif 3521 } 3522 3523 static __inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, 3524 struct sock *child) 3525 { 3526 #if 0 3527 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 3528 3529 req->sk = child; 3530 tcp_acceptq_added(sk); 3531 3532 if (!tp->accept_queue_tail) { 3533 tp->accept_queue = req; 3534 } else { 3535 tp->accept_queue_tail->dl_next = req; 3536 } 3537 tp->accept_queue_tail = req; 3538 req->dl_next = NULL; 3539 #endif 3540 } 3541 3542 struct tcp_listen_opt 3543 { 3544 u8 max_qlen_log; /* log_2 of maximal queued SYNs */ 3545 int qlen; 3546 int qlen_young; 3547 int clock_hand; 3548 struct open_request *syn_table[TCP_SYNQ_HSIZE]; 3549 }; 3550 3551 static __inline void 3552 tcp_synq_removed(struct sock *sk, struct open_request *req) 3553 { 3554 #if 0 3555 struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt; 3556 3557 if (--lopt->qlen == 0) 3558 tcp_delete_keepalive_timer(sk); 3559 if (req->retrans == 0) 3560 lopt->qlen_young--; 3561 #endif 3562 } 3563 3564 static __inline void tcp_synq_added(struct sock *sk) 3565 { 3566 #if 0 3567 struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt; 3568 3569 if (lopt->qlen++ == 0) 3570 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); 3571 lopt->qlen_young++; 3572 #endif 3573 } 3574 3575 static __inline int tcp_synq_len(struct sock *sk) 3576 { 3577 #if 0 3578 return sk->tp_pinfo.af_tcp.listen_opt->qlen; 3579 #else 3580 return 0; 3581 #endif 3582 } 3583 3584 static __inline int tcp_synq_young(struct sock *sk) 3585 { 3586 #if 0 3587 return sk->tp_pinfo.af_tcp.listen_opt->qlen_young; 3588 #else 3589 return 0; 3590 #endif 3591 } 3592 3593 static __inline int tcp_synq_is_full(struct sock *sk) 3594 { 3595 #if 0 3596 return tcp_synq_len(sk)>>sk->tp_pinfo.af_tcp.listen_opt->max_qlen_log; 3597 #else 3598 return 0; 3599 #endif 3600 } 3601 3602 static __inline void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req, 3603 struct open_request **prev) 3604 { 3605 #if 0 3606 write_lock(&tp->syn_wait_lock); 3607 *prev = req->dl_next; 3608 write_unlock(&tp->syn_wait_lock); 3609 #endif 3610 } 3611 3612 static __inline void tcp_synq_drop(struct sock *sk, struct open_request *req, 3613 struct open_request **prev) 3614 { 3615 #if 0 3616 tcp_synq_unlink(&sk->tp_pinfo.af_tcp, req, prev); 3617 tcp_synq_removed(sk, req); 3618 tcp_openreq_free(req); 3619 #endif 3620 } 3621 3622 static __inline void tcp_openreq_init(struct open_request *req, 3623 struct tcp_opt *tp, 3624 struct sk_buff *skb) 3625 { 3626 #if 0 3627 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ 3628 req->rcv_isn = TCP_SKB_CB(skb)->seq; 3629 req->mss = tp->mss_clamp; 3630 req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0; 3631 req->tstamp_ok = tp->tstamp_ok; 3632 req->sack_ok = tp->sack_ok; 3633 req->snd_wscale = tp->snd_wscale; 3634 req->wscale_ok = tp->wscale_ok; 3635 req->acked = 0; 3636 req->ecn_ok = 0; 3637 req->rmt_port = skb->h.th->source; 3638 #endif 3639 } 3640 3641 #define TCP_MEM_QUANTUM ((int)PAGE_SIZE) 3642 3643 static __inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb) 3644 { 3645 #if 0 3646 sk->tp_pinfo.af_tcp.queue_shrunk = 1; 3647 sk->wmem_queued -= skb->truesize; 3648 sk->forward_alloc += skb->truesize; 3649 __kfree_skb(skb); 3650 #endif 3651 } 3652 3653 static __inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb) 3654 { 3655 #if 0 3656 sk->wmem_queued += skb->truesize; 3657 sk->forward_alloc -= skb->truesize; 3658 #endif 3659 } 3660 3661 extern void __tcp_mem_reclaim(struct sock *sk); 3662 extern int tcp_mem_schedule(struct sock *sk, int size, int kind); 3663 3664 static __inline void tcp_mem_reclaim(struct sock *sk) 3665 { 3666 #if 0 3667 if (sk->forward_alloc >= TCP_MEM_QUANTUM) 3668 __tcp_mem_reclaim(sk); 3669 #endif 3670 } 3671 3672 static __inline void tcp_enter_memory_pressure(void) 3673 { 3674 #if 0 3675 if (!tcp_memory_pressure) { 3676 NET_INC_STATS(TCPMemoryPressures); 3677 tcp_memory_pressure = 1; 3678 } 3679 #endif 3680 } 3681 3682 static __inline void tcp_moderate_sndbuf(struct sock *sk) 3683 { 3684 #if 0 3685 if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) { 3686 sk->sndbuf = min(sk->sndbuf, sk->wmem_queued/2); 3687 sk->sndbuf = max(sk->sndbuf, SOCK_MIN_SNDBUF); 3688 } 3689 #endif 3690 } 3691 3692 static __inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp) 3693 { 3694 #if 0 3695 struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp); 3696 3697 if (skb) { 3698 skb->truesize += mem; 3699 if (sk->forward_alloc >= (int)skb->truesize || 3700 tcp_mem_schedule(sk, skb->truesize, 0)) { 3701 skb_reserve(skb, MAX_TCP_HEADER); 3702 return skb; 3703 } 3704 __kfree_skb(skb); 3705 } else { 3706 tcp_enter_memory_pressure(); 3707 tcp_moderate_sndbuf(sk); 3708 } 3709 return NULL; 3710 #else 3711 return NULL; 3712 #endif 3713 } 3714 3715 static __inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp) 3716 { 3717 #if 0 3718 return tcp_alloc_pskb(sk, size, 0, gfp); 3719 #else 3720 return NULL; 3721 #endif 3722 } 3723 3724 static __inline struct page * tcp_alloc_page(struct sock *sk) 3725 { 3726 #if 0 3727 if (sk->forward_alloc >= (int)PAGE_SIZE || 3728 tcp_mem_schedule(sk, PAGE_SIZE, 0)) { 3729 struct page *page = alloc_pages(sk->allocation, 0); 3730 if (page) 3731 return page; 3732 } 3733 tcp_enter_memory_pressure(); 3734 tcp_moderate_sndbuf(sk); 3735 return NULL; 3736 #else 3737 return NULL; 3738 #endif 3739 } 3740 3741 static __inline void tcp_writequeue_purge(struct sock *sk) 3742 { 3743 #if 0 3744 struct sk_buff *skb; 3745 3746 while ((skb = __skb_dequeue(&sk->write_queue)) != NULL) 3747 tcp_free_skb(sk, skb); 3748 tcp_mem_reclaim(sk); 3749 #endif 3750 } 3751 3752 extern void tcp_rfree(struct sk_buff *skb); 3753 3754 static __inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk) 3755 { 3756 #if 0 3757 skb->sk = sk; 3758 skb->destructor = tcp_rfree; 3759 atomic_add(skb->truesize, &sk->rmem_alloc); 3760 sk->forward_alloc -= skb->truesize; 3761 #endif 3762 } 3763 3764 extern void tcp_listen_wlock(void); 3765 3766 /* - We may sleep inside this lock. 3767 * - If sleeping is not required (or called from BH), 3768 * use plain read_(un)lock(&tcp_lhash_lock). 3769 */ 3770 3771 static __inline void tcp_listen_lock(void) 3772 { 3773 #if 0 3774 /* read_lock synchronizes to candidates to writers */ 3775 read_lock(&tcp_lhash_lock); 3776 atomic_inc(&tcp_lhash_users); 3777 read_unlock(&tcp_lhash_lock); 3778 #endif 3779 } 3780 3781 static __inline void tcp_listen_unlock(void) 3782 { 3783 #if 0 3784 if (atomic_dec_and_test(&tcp_lhash_users)) 3785 wake_up(&tcp_lhash_wait); 3786 #endif 3787 } 3788 3789 static __inline int keepalive_intvl_when(struct tcp_opt *tp) 3790 { 3791 #if 0 3792 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; 3793 #else 3794 return 0; 3795 #endif 3796 } 3797 3798 static __inline int keepalive_time_when(struct tcp_opt *tp) 3799 { 3800 #if 0 3801 return tp->keepalive_time ? : sysctl_tcp_keepalive_time; 3802 #else 3803 return 0; 3804 #endif 3805 } 3806 3807 static __inline int tcp_fin_time(struct tcp_opt *tp) 3808 { 3809 #if 0 3810 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; 3811 3812 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) 3813 fin_timeout = (tp->rto<<2) - (tp->rto>>1); 3814 3815 return fin_timeout; 3816 #else 3817 return 0; 3818 #endif 3819 } 3820 3821 static __inline int tcp_paws_check(struct tcp_opt *tp, int rst) 3822 { 3823 #if 0 3824 if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0) 3825 return 0; 3826 if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS) 3827 return 0; 3828 3829 /* RST segments are not recommended to carry timestamp, 3830 and, if they do, it is recommended to ignore PAWS because 3831 "their cleanup function should take precedence over timestamps." 3832 Certainly, it is mistake. It is necessary to understand the reasons 3833 of this constraint to relax it: if peer reboots, clock may go 3834 out-of-sync and half-open connections will not be reset. 3835 Actually, the problem would be not existing if all 3836 the implementations followed draft about maintaining clock 3837 via reboots. Linux-2.2 DOES NOT! 3838 3839 However, we can relax time bounds for RST segments to MSL. 3840 */ 3841 if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL) 3842 return 0; 3843 return 1; 3844 #else 3845 return 0; 3846 #endif 3847 } 3848 3849 #define TCP_CHECK_TIMER(sk) do { } while (0) 3850 3851 #endif /* __TCPCORE_H */ 3852 3853 3854 // 3855 #endif 3856