1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12 2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: cxgb_lro.c,v 1.2 2011/05/18 01:01:59 dyoung Exp $");
32 
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/conf.h>
38 #include <sys/bus.h>
39 #include <sys/queue.h>
40 
41 #include <netinet/in_systm.h>
42 #include <netinet/in.h>
43 #include <netinet/ip.h>
44 #include <netinet/tcp.h>
45 
46 
47 #ifdef CONFIG_DEFINED
48 #include <dev/pci/cxgb/cxgb_include.h>
49 
50 #include <machine/in_cksum.h>
51 #endif
52 
53 #include "cxgb_include.h"
54 
55 #ifndef M_LRO
56 #define M_LRO    0x0200
57 #endif
58 
59 #ifdef DEBUG
60 #define MBUF_HEADER_CHECK(m) do { \
61     if ((m->m_len == 0) || (m->m_pkthdr.len == 0)   \
62         || ((m->m_flags & M_PKTHDR) == 0))              \
63         panic("lro_flush_session - mbuf len=%d pktlen=%d flags=0x%x\n", \
64             m->m_len, m->m_pkthdr.len, m->m_flags); \
65     if ((m->m_flags & M_PKTHDR) == 0)               \
66         panic("first mbuf is not packet header - flags=0x%x\n", \
67             m->m_flags);  \
68     if ((m->m_len < ETHER_HDR_LEN) || (m->m_pkthdr.len < ETHER_HDR_LEN)) \
69         panic("packet too small len=%d pktlen=%d\n", \
70             m->m_len, m->m_pkthdr.len);\
71 } while (0)
72 #else
73 #define MBUF_HEADER_CHECK(m)
74 #endif
75 
76 #define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETHER_HDR_LEN)
77 #define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_SES - 1))
78 #define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_SES - 1)
79 
80 static __inline int
lro_match(struct mbuf * m,struct ip * ih,struct tcphdr * th)81 lro_match(struct mbuf *m, struct ip *ih, struct tcphdr *th)
82 {
83     struct ip *sih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
84     struct tcphdr *sth = (struct tcphdr *) (sih + 1);
85 
86     return (th->th_sport == sth->th_sport &&
87         th->th_dport == sth->th_dport &&
88         ih->ip_src.s_addr == sih->ip_src.s_addr &&
89         ih->ip_dst.s_addr == sih->ip_dst.s_addr);
90 }
91 
92 static __inline struct t3_lro_session *
lro_lookup(struct lro_state * l,int idx,struct ip * ih,struct tcphdr * th)93 lro_lookup(struct lro_state *l, int idx, struct ip *ih, struct tcphdr *th)
94 {
95     struct t3_lro_session *s = NULL;
96     int active = l->nactive;
97 
98     while (active) {
99         s = &l->sess[idx];
100         if (s->head) {
101             if (lro_match(s->head, ih, th))
102                 break;
103             active--;
104         }
105         LRO_IDX_INC(idx);
106     }
107 
108     return (s);
109 }
110 
111 static __inline int
can_lro_packet(struct cpl_rx_pkt * cpl,unsigned int rss_hi)112 can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi)
113 {
114     struct ether_header *eh = (struct ether_header *)(cpl + 1);
115     struct ip *ih = (struct ip *)(eh + 1);
116 
117     /*
118      * XXX VLAN support?
119      */
120     if (__predict_false(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE ||
121              (*((uint8_t *)cpl + 1) & 0x90) != 0x10 ||
122              cpl->csum != 0xffff || eh->ether_type != ntohs(ETHERTYPE_IP) ||
123              ih->ip_hl != (sizeof (*ih) >> 2))) {
124         return 0;
125     }
126 
127     return 1;
128 }
129 
130 static int
can_lro_tcpsegment(struct tcphdr * th)131 can_lro_tcpsegment(struct tcphdr *th)
132 {
133     int olen = (th->th_off << 2) - sizeof (*th);
134     u8 control_bits = *((u8 *)th + 13);
135 
136     if (__predict_false((control_bits & 0xB7) != 0x10))
137         goto no_lro;
138 
139     if (olen) {
140         uint32_t *ptr = (u32 *)(th + 1);
141         if (__predict_false(olen != TCPOLEN_TSTAMP_APPA ||
142                  *ptr != ntohl((TCPOPT_NOP << 24) |
143                        (TCPOPT_NOP << 16) |
144                        (TCPOPT_TIMESTAMP << 8) |
145                         TCPOLEN_TIMESTAMP)))
146             goto no_lro;
147     }
148 
149     return 1;
150 
151  no_lro:
152     return 0;
153 }
154 
155 static __inline void
lro_new_session_init(struct t3_lro_session * s,struct mbuf * m)156 lro_new_session_init(struct t3_lro_session *s, struct mbuf *m)
157 {
158     struct ip *ih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
159     struct tcphdr *th = (struct tcphdr *) (ih + 1);
160     int ip_len = ntohs(ih->ip_len);
161 
162     DPRINTF("%s(s=%p, m=%p)\n", __func__, s, m);
163 
164     s->head = m;
165 
166     MBUF_HEADER_CHECK(m);
167     s->ip_len = ip_len;
168     s->seq = ntohl(th->th_seq) + ip_len - sizeof(*ih) - (th->th_off << 2);
169 
170 }
171 
172 static void
lro_flush_session(struct sge_qset * qs,struct t3_lro_session * s,struct mbuf * m)173 lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m)
174 {
175     struct lro_state *l = &qs->lro;
176     struct mbuf *sm = s->head;
177     struct ip *ih = (struct ip *)(mtod(sm, uint8_t *) + IPH_OFFSET);
178 
179 
180     DPRINTF("%s(qs=%p, s=%p, ", __func__,
181         qs, s);
182 
183     if (m)
184         DPRINTF("m=%p)\n", m);
185     else
186         DPRINTF("m=NULL)\n");
187 
188     ih->ip_len = htons(s->ip_len);
189     ih->ip_sum = 0;
190     ih->ip_sum = in_cksum_hdr(ih);
191 
192     MBUF_HEADER_CHECK(sm);
193 
194     sm->m_flags |= M_LRO;
195     t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2);
196 
197     if (m) {
198         s->head = m;
199         lro_new_session_init(s, m);
200     } else {
201         s->head = NULL;
202         l->nactive--;
203     }
204 
205     qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++;
206 }
207 
208 static __inline struct t3_lro_session *
lro_new_session(struct sge_qset * qs,struct mbuf * m,uint32_t rss_hash)209 lro_new_session(struct sge_qset *qs, struct mbuf *m, uint32_t rss_hash)
210 {
211     struct lro_state *l = &qs->lro;
212     int idx = LRO_SESSION_IDX_HINT_HASH(rss_hash);
213     struct t3_lro_session *s = &l->sess[idx];
214 
215     DPRINTF("%s(qs=%p,  m=%p, rss_hash=0x%x)\n", __func__,
216         qs, m, rss_hash);
217 
218     if (__predict_true(!s->head))
219         goto done;
220 
221     if (l->nactive > MAX_LRO_SES)
222         panic("MAX_LRO_PER_QSET exceeded");
223 
224     if (l->nactive == MAX_LRO_SES) {
225         lro_flush_session(qs, s, m);
226         qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++;
227         return s;
228     }
229 
230     while (1) {
231         LRO_IDX_INC(idx);
232         s = &l->sess[idx];
233         if (!s->head)
234             break;
235     }
236 done:
237     lro_new_session_init(s, m);
238     l->nactive++;
239 
240     return s;
241 }
242 
243 static __inline int
lro_update_session(struct t3_lro_session * s,struct mbuf * m)244 lro_update_session(struct t3_lro_session *s, struct mbuf *m)
245 {
246     struct mbuf *sm = s->head;
247     struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(sm, uint8_t *) + 2);
248     struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + 2);
249     struct ip *nih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
250     struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1);
251     uint32_t seq = ntohl(nth->th_seq);
252     int plen, tcpiphlen, olen = (nth->th_off << 2) - sizeof (*nth);
253 
254 
255     DPRINTF("%s(s=%p,  m=%p)\n", __func__, s, m);
256     if (cpl->vlan_valid && cpl->vlan != ncpl->vlan) {
257         return -1;
258     }
259     if (__predict_false(seq != s->seq)) {
260         DPRINTF("sequence mismatch\n");
261         return -1;
262     }
263 
264     MBUF_HEADER_CHECK(sm);
265     th = (struct tcphdr *)(mtod(sm, uint8_t *) + IPH_OFFSET + sizeof (struct ip));
266 
267     if (olen) {
268         uint32_t *ptr = (uint32_t *)(th + 1);
269         uint32_t *nptr = (uint32_t *)(nth + 1);
270 
271         if (__predict_false(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) ||
272                  !*(nptr + 2))) {
273             return -1;
274         }
275         *(ptr + 1) = *(nptr + 1);
276         *(ptr + 2) = *(nptr + 2);
277     }
278     th->th_ack = nth->th_ack;
279     th->th_win = nth->th_win;
280 
281     tcpiphlen = (nth->th_off << 2) + sizeof (*nih);
282     plen = ntohs(nih->ip_len) - tcpiphlen;
283     s->seq += plen;
284     s->ip_len += plen;
285     sm->m_pkthdr.len += plen;
286 
287     /*
288      * XXX FIX ME
289      *
290      *
291      */
292 
293 #if 0
294     /* XXX this I *do not* understand */
295     if (plen > skb_shinfo(s->skb)->gso_size)
296         skb_shinfo(s->skb)->gso_size = plen;
297 #endif
298     DPRINTF("m_adj(%d)\n", (int)(IPH_OFFSET + tcpiphlen));
299     m_adj(m, IPH_OFFSET + tcpiphlen);
300 #if 0
301     if (__predict_false(!skb_shinfo(s->skb)->frag_list))
302         skb_shinfo(s->skb)->frag_list = skb;
303 
304 #endif
305 
306 #if 0
307 
308     /*
309      * XXX we really need to be able to
310      * support vectors of buffers in FreeBSD
311      */
312     int nr = skb_shinfo(s->skb)->nr_frags;
313     skb_shinfo(s->skb)->frags[nr].page = frag->page;
314     skb_shinfo(s->skb)->frags[nr].page_offset =
315         frag->page_offset + IPH_OFFSET + tcpiphlen;
316     skb_shinfo(s->skb)->frags[nr].size = plen;
317     skb_shinfo(s->skb)->nr_frags = ++nr;
318 
319 #endif
320     return (0);
321 }
322 
323 void
t3_rx_eth_lro(adapter_t * adap,struct sge_rspq * rq,struct mbuf * m,int ethpad,uint32_t rss_hash,uint32_t rss_csum,int lro)324 t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
325     int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro)
326 {
327     struct sge_qset *qs = rspq_to_qset(rq);
328     struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
329     struct ether_header *eh = (struct ether_header *)(cpl + 1);
330     struct ip *ih;
331     struct tcphdr *th;
332     struct t3_lro_session *s = NULL;
333 
334     if (lro == 0)
335         goto no_lro;
336 
337     if (!can_lro_packet(cpl, rss_csum))
338         goto no_lro;
339 
340     ih = (struct ip *)(eh + 1);
341     th = (struct tcphdr *)(ih + 1);
342 
343     s = lro_lookup(&qs->lro,
344         LRO_SESSION_IDX_HINT_HASH(rss_hash), ih, th);
345 
346     if (__predict_false(!can_lro_tcpsegment(th))) {
347         goto no_lro;
348     } else if (__predict_false(!s)) {
349         s = lro_new_session(qs, m, rss_hash);
350     } else {
351         if (lro_update_session(s, m)) {
352             lro_flush_session(qs, s, m);
353         }
354 #ifdef notyet
355         if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) {
356             lro_flush_session(qs, s, NULL);
357         }
358 #endif
359     }
360 
361     qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
362     return;
363 no_lro:
364     if (s)
365         lro_flush_session(qs, s, NULL);
366 
367     if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0)
368         DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n",
369             m->m_len, m->m_pkthdr.len, m->m_flags);
370 
371     t3_rx_eth(adap, rq, m, ethpad);
372 }
373 
374 void
t3_lro_flush(adapter_t * adap,struct sge_qset * qs,struct lro_state * state)375 t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state)
376 {
377     unsigned int idx = state->active_idx;
378 
379     while (state->nactive) {
380         struct t3_lro_session *s = &state->sess[idx];
381 
382         if (s->head)
383             lro_flush_session(qs, s, NULL);
384         LRO_IDX_INC(idx);
385     }
386 }
387