1 /**************************************************************************
2
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: cxgb_lro.c,v 1.2 2011/05/18 01:01:59 dyoung Exp $");
32
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/conf.h>
38 #include <sys/bus.h>
39 #include <sys/queue.h>
40
41 #include <netinet/in_systm.h>
42 #include <netinet/in.h>
43 #include <netinet/ip.h>
44 #include <netinet/tcp.h>
45
46
47 #ifdef CONFIG_DEFINED
48 #include <dev/pci/cxgb/cxgb_include.h>
49
50 #include <machine/in_cksum.h>
51 #endif
52
53 #include "cxgb_include.h"
54
55 #ifndef M_LRO
56 #define M_LRO 0x0200
57 #endif
58
59 #ifdef DEBUG
60 #define MBUF_HEADER_CHECK(m) do { \
61 if ((m->m_len == 0) || (m->m_pkthdr.len == 0) \
62 || ((m->m_flags & M_PKTHDR) == 0)) \
63 panic("lro_flush_session - mbuf len=%d pktlen=%d flags=0x%x\n", \
64 m->m_len, m->m_pkthdr.len, m->m_flags); \
65 if ((m->m_flags & M_PKTHDR) == 0) \
66 panic("first mbuf is not packet header - flags=0x%x\n", \
67 m->m_flags); \
68 if ((m->m_len < ETHER_HDR_LEN) || (m->m_pkthdr.len < ETHER_HDR_LEN)) \
69 panic("packet too small len=%d pktlen=%d\n", \
70 m->m_len, m->m_pkthdr.len);\
71 } while (0)
72 #else
73 #define MBUF_HEADER_CHECK(m)
74 #endif
75
76 #define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETHER_HDR_LEN)
77 #define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_SES - 1))
78 #define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_SES - 1)
79
80 static __inline int
lro_match(struct mbuf * m,struct ip * ih,struct tcphdr * th)81 lro_match(struct mbuf *m, struct ip *ih, struct tcphdr *th)
82 {
83 struct ip *sih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
84 struct tcphdr *sth = (struct tcphdr *) (sih + 1);
85
86 return (th->th_sport == sth->th_sport &&
87 th->th_dport == sth->th_dport &&
88 ih->ip_src.s_addr == sih->ip_src.s_addr &&
89 ih->ip_dst.s_addr == sih->ip_dst.s_addr);
90 }
91
92 static __inline struct t3_lro_session *
lro_lookup(struct lro_state * l,int idx,struct ip * ih,struct tcphdr * th)93 lro_lookup(struct lro_state *l, int idx, struct ip *ih, struct tcphdr *th)
94 {
95 struct t3_lro_session *s = NULL;
96 int active = l->nactive;
97
98 while (active) {
99 s = &l->sess[idx];
100 if (s->head) {
101 if (lro_match(s->head, ih, th))
102 break;
103 active--;
104 }
105 LRO_IDX_INC(idx);
106 }
107
108 return (s);
109 }
110
111 static __inline int
can_lro_packet(struct cpl_rx_pkt * cpl,unsigned int rss_hi)112 can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi)
113 {
114 struct ether_header *eh = (struct ether_header *)(cpl + 1);
115 struct ip *ih = (struct ip *)(eh + 1);
116
117 /*
118 * XXX VLAN support?
119 */
120 if (__predict_false(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE ||
121 (*((uint8_t *)cpl + 1) & 0x90) != 0x10 ||
122 cpl->csum != 0xffff || eh->ether_type != ntohs(ETHERTYPE_IP) ||
123 ih->ip_hl != (sizeof (*ih) >> 2))) {
124 return 0;
125 }
126
127 return 1;
128 }
129
130 static int
can_lro_tcpsegment(struct tcphdr * th)131 can_lro_tcpsegment(struct tcphdr *th)
132 {
133 int olen = (th->th_off << 2) - sizeof (*th);
134 u8 control_bits = *((u8 *)th + 13);
135
136 if (__predict_false((control_bits & 0xB7) != 0x10))
137 goto no_lro;
138
139 if (olen) {
140 uint32_t *ptr = (u32 *)(th + 1);
141 if (__predict_false(olen != TCPOLEN_TSTAMP_APPA ||
142 *ptr != ntohl((TCPOPT_NOP << 24) |
143 (TCPOPT_NOP << 16) |
144 (TCPOPT_TIMESTAMP << 8) |
145 TCPOLEN_TIMESTAMP)))
146 goto no_lro;
147 }
148
149 return 1;
150
151 no_lro:
152 return 0;
153 }
154
155 static __inline void
lro_new_session_init(struct t3_lro_session * s,struct mbuf * m)156 lro_new_session_init(struct t3_lro_session *s, struct mbuf *m)
157 {
158 struct ip *ih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
159 struct tcphdr *th = (struct tcphdr *) (ih + 1);
160 int ip_len = ntohs(ih->ip_len);
161
162 DPRINTF("%s(s=%p, m=%p)\n", __func__, s, m);
163
164 s->head = m;
165
166 MBUF_HEADER_CHECK(m);
167 s->ip_len = ip_len;
168 s->seq = ntohl(th->th_seq) + ip_len - sizeof(*ih) - (th->th_off << 2);
169
170 }
171
172 static void
lro_flush_session(struct sge_qset * qs,struct t3_lro_session * s,struct mbuf * m)173 lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m)
174 {
175 struct lro_state *l = &qs->lro;
176 struct mbuf *sm = s->head;
177 struct ip *ih = (struct ip *)(mtod(sm, uint8_t *) + IPH_OFFSET);
178
179
180 DPRINTF("%s(qs=%p, s=%p, ", __func__,
181 qs, s);
182
183 if (m)
184 DPRINTF("m=%p)\n", m);
185 else
186 DPRINTF("m=NULL)\n");
187
188 ih->ip_len = htons(s->ip_len);
189 ih->ip_sum = 0;
190 ih->ip_sum = in_cksum_hdr(ih);
191
192 MBUF_HEADER_CHECK(sm);
193
194 sm->m_flags |= M_LRO;
195 t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2);
196
197 if (m) {
198 s->head = m;
199 lro_new_session_init(s, m);
200 } else {
201 s->head = NULL;
202 l->nactive--;
203 }
204
205 qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++;
206 }
207
208 static __inline struct t3_lro_session *
lro_new_session(struct sge_qset * qs,struct mbuf * m,uint32_t rss_hash)209 lro_new_session(struct sge_qset *qs, struct mbuf *m, uint32_t rss_hash)
210 {
211 struct lro_state *l = &qs->lro;
212 int idx = LRO_SESSION_IDX_HINT_HASH(rss_hash);
213 struct t3_lro_session *s = &l->sess[idx];
214
215 DPRINTF("%s(qs=%p, m=%p, rss_hash=0x%x)\n", __func__,
216 qs, m, rss_hash);
217
218 if (__predict_true(!s->head))
219 goto done;
220
221 if (l->nactive > MAX_LRO_SES)
222 panic("MAX_LRO_PER_QSET exceeded");
223
224 if (l->nactive == MAX_LRO_SES) {
225 lro_flush_session(qs, s, m);
226 qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++;
227 return s;
228 }
229
230 while (1) {
231 LRO_IDX_INC(idx);
232 s = &l->sess[idx];
233 if (!s->head)
234 break;
235 }
236 done:
237 lro_new_session_init(s, m);
238 l->nactive++;
239
240 return s;
241 }
242
243 static __inline int
lro_update_session(struct t3_lro_session * s,struct mbuf * m)244 lro_update_session(struct t3_lro_session *s, struct mbuf *m)
245 {
246 struct mbuf *sm = s->head;
247 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(sm, uint8_t *) + 2);
248 struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + 2);
249 struct ip *nih = (struct ip *)(mtod(m, uint8_t *) + IPH_OFFSET);
250 struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1);
251 uint32_t seq = ntohl(nth->th_seq);
252 int plen, tcpiphlen, olen = (nth->th_off << 2) - sizeof (*nth);
253
254
255 DPRINTF("%s(s=%p, m=%p)\n", __func__, s, m);
256 if (cpl->vlan_valid && cpl->vlan != ncpl->vlan) {
257 return -1;
258 }
259 if (__predict_false(seq != s->seq)) {
260 DPRINTF("sequence mismatch\n");
261 return -1;
262 }
263
264 MBUF_HEADER_CHECK(sm);
265 th = (struct tcphdr *)(mtod(sm, uint8_t *) + IPH_OFFSET + sizeof (struct ip));
266
267 if (olen) {
268 uint32_t *ptr = (uint32_t *)(th + 1);
269 uint32_t *nptr = (uint32_t *)(nth + 1);
270
271 if (__predict_false(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) ||
272 !*(nptr + 2))) {
273 return -1;
274 }
275 *(ptr + 1) = *(nptr + 1);
276 *(ptr + 2) = *(nptr + 2);
277 }
278 th->th_ack = nth->th_ack;
279 th->th_win = nth->th_win;
280
281 tcpiphlen = (nth->th_off << 2) + sizeof (*nih);
282 plen = ntohs(nih->ip_len) - tcpiphlen;
283 s->seq += plen;
284 s->ip_len += plen;
285 sm->m_pkthdr.len += plen;
286
287 /*
288 * XXX FIX ME
289 *
290 *
291 */
292
293 #if 0
294 /* XXX this I *do not* understand */
295 if (plen > skb_shinfo(s->skb)->gso_size)
296 skb_shinfo(s->skb)->gso_size = plen;
297 #endif
298 DPRINTF("m_adj(%d)\n", (int)(IPH_OFFSET + tcpiphlen));
299 m_adj(m, IPH_OFFSET + tcpiphlen);
300 #if 0
301 if (__predict_false(!skb_shinfo(s->skb)->frag_list))
302 skb_shinfo(s->skb)->frag_list = skb;
303
304 #endif
305
306 #if 0
307
308 /*
309 * XXX we really need to be able to
310 * support vectors of buffers in FreeBSD
311 */
312 int nr = skb_shinfo(s->skb)->nr_frags;
313 skb_shinfo(s->skb)->frags[nr].page = frag->page;
314 skb_shinfo(s->skb)->frags[nr].page_offset =
315 frag->page_offset + IPH_OFFSET + tcpiphlen;
316 skb_shinfo(s->skb)->frags[nr].size = plen;
317 skb_shinfo(s->skb)->nr_frags = ++nr;
318
319 #endif
320 return (0);
321 }
322
323 void
t3_rx_eth_lro(adapter_t * adap,struct sge_rspq * rq,struct mbuf * m,int ethpad,uint32_t rss_hash,uint32_t rss_csum,int lro)324 t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
325 int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro)
326 {
327 struct sge_qset *qs = rspq_to_qset(rq);
328 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
329 struct ether_header *eh = (struct ether_header *)(cpl + 1);
330 struct ip *ih;
331 struct tcphdr *th;
332 struct t3_lro_session *s = NULL;
333
334 if (lro == 0)
335 goto no_lro;
336
337 if (!can_lro_packet(cpl, rss_csum))
338 goto no_lro;
339
340 ih = (struct ip *)(eh + 1);
341 th = (struct tcphdr *)(ih + 1);
342
343 s = lro_lookup(&qs->lro,
344 LRO_SESSION_IDX_HINT_HASH(rss_hash), ih, th);
345
346 if (__predict_false(!can_lro_tcpsegment(th))) {
347 goto no_lro;
348 } else if (__predict_false(!s)) {
349 s = lro_new_session(qs, m, rss_hash);
350 } else {
351 if (lro_update_session(s, m)) {
352 lro_flush_session(qs, s, m);
353 }
354 #ifdef notyet
355 if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) {
356 lro_flush_session(qs, s, NULL);
357 }
358 #endif
359 }
360
361 qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
362 return;
363 no_lro:
364 if (s)
365 lro_flush_session(qs, s, NULL);
366
367 if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0)
368 DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n",
369 m->m_len, m->m_pkthdr.len, m->m_flags);
370
371 t3_rx_eth(adap, rq, m, ethpad);
372 }
373
374 void
t3_lro_flush(adapter_t * adap,struct sge_qset * qs,struct lro_state * state)375 t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state)
376 {
377 unsigned int idx = state->active_idx;
378
379 while (state->nactive) {
380 struct t3_lro_session *s = &state->sess[idx];
381
382 if (s->head)
383 lro_flush_session(qs, s, NULL);
384 LRO_IDX_INC(idx);
385 }
386 }
387