1 /**************************************************************************
2
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: cxgb_sge.c,v 1.4 2016/06/10 13:27:14 ozaki-r Exp $");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/conf.h>
37 #include <sys/bus.h>
38 #include <sys/queue.h>
39 #include <sys/sysctl.h>
40
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/systm.h>
44
45 #include <netinet/in_systm.h>
46 #include <netinet/in.h>
47 #include <netinet/ip.h>
48 #include <netinet/tcp.h>
49
50 #include <dev/pci/pcireg.h>
51 #include <dev/pci/pcivar.h>
52
53 #ifdef CONFIG_DEFINED
54 #include <cxgb_include.h>
55 #else
56 #include <dev/pci/cxgb/cxgb_include.h>
57 #endif
58
59 uint32_t collapse_free = 0;
60 uint32_t mb_free_vec_free = 0;
61 int txq_fills = 0;
62 int collapse_mbufs = 0;
63 static int bogus_imm = 0;
64 #ifndef DISABLE_MBUF_IOVEC
65 static int recycle_enable = 1;
66 #endif
67
68 #define USE_GTS 0
69
70 #define SGE_RX_SM_BUF_SIZE 1536
71 #define SGE_RX_DROP_THRES 16
72 #define SGE_RX_COPY_THRES 128
73
74 /*
75 * Period of the Tx buffer reclaim timer. This timer does not need to run
76 * frequently as Tx buffers are usually reclaimed by new Tx packets.
77 */
78 #define TX_RECLAIM_PERIOD (hz >> 1)
79
80 /*
81 * work request size in bytes
82 */
83 #define WR_LEN (WR_FLITS * 8)
84
85 /*
86 * Values for sge_txq.flags
87 */
88 enum {
89 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
90 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
91 };
92
93 struct tx_desc {
94 uint64_t flit[TX_DESC_FLITS];
95 } __packed;
96
97 struct rx_desc {
98 uint32_t addr_lo;
99 uint32_t len_gen;
100 uint32_t gen2;
101 uint32_t addr_hi;
102 } __packed;
103
104 struct rsp_desc { /* response queue descriptor */
105 struct rss_header rss_hdr;
106 uint32_t flags;
107 uint32_t len_cq;
108 uint8_t imm_data[47];
109 uint8_t intr_gen;
110 } __packed;
111
112 #define RX_SW_DESC_MAP_CREATED (1 << 0)
113 #define TX_SW_DESC_MAP_CREATED (1 << 1)
114 #define RX_SW_DESC_INUSE (1 << 3)
115 #define TX_SW_DESC_MAPPED (1 << 4)
116
117 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0)
118 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP)
119 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP)
120 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
121
122 struct tx_sw_desc { /* SW state per Tx descriptor */
123 struct mbuf *m;
124 bus_dma_segment_t segs[1];
125 bus_dmamap_t map;
126 int flags;
127 };
128
129 struct rx_sw_desc { /* SW state per Rx descriptor */
130 void *cl;
131 bus_dmamap_t map;
132 int flags;
133 };
134
135 struct txq_state {
136 unsigned int compl;
137 unsigned int gen;
138 unsigned int pidx;
139 };
140
141 /*
142 * Maps a number of flits to the number of Tx descriptors that can hold them.
143 * The formula is
144 *
145 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
146 *
147 * HW allows up to 4 descriptors to be combined into a WR.
148 */
149 static uint8_t flit_desc_map[] = {
150 0,
151 #if SGE_NUM_GENBITS == 1
152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
155 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
156 #elif SGE_NUM_GENBITS == 2
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
160 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
161 #else
162 # error "SGE_NUM_GENBITS must be 1 or 2"
163 #endif
164 };
165
166
167 static int lro_default = 0;
168 int cxgb_debug = 0;
169
170 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
171 static void sge_timer_cb(void *arg);
172 static void sge_timer_reclaim(struct work *wk, void *arg);
173 static void sge_txq_reclaim_handler(struct work *wk, void *arg);
174 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
175
176 /**
177 * reclaim_completed_tx - reclaims completed Tx descriptors
178 * @adapter: the adapter
179 * @q: the Tx queue to reclaim completed descriptors from
180 *
181 * Reclaims Tx descriptors that the SGE has indicated it has processed,
182 * and frees the associated buffers if possible. Called with the Tx
183 * queue's lock held.
184 */
185 static __inline int
reclaim_completed_tx(struct sge_txq * q,int nbufs,struct mbuf ** mvec)186 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
187 {
188 int reclaimed, reclaim = desc_reclaimable(q);
189 int n = 0;
190
191 mtx_assert(&q->lock, MA_OWNED);
192 if (reclaim > 0) {
193 n = free_tx_desc(q, min(reclaim, nbufs), mvec);
194 reclaimed = min(reclaim, nbufs);
195 q->cleaned += reclaimed;
196 q->in_use -= reclaimed;
197 }
198 return (n);
199 }
200
201 /**
202 * should_restart_tx - are there enough resources to restart a Tx queue?
203 * @q: the Tx queue
204 *
205 * Checks if there are enough descriptors to restart a suspended Tx queue.
206 */
207 static __inline int
should_restart_tx(const struct sge_txq * q)208 should_restart_tx(const struct sge_txq *q)
209 {
210 unsigned int r = q->processed - q->cleaned;
211
212 return q->in_use - r < (q->size >> 1);
213 }
214
215 /**
216 * t3_sge_init - initialize SGE
217 * @adap: the adapter
218 * @p: the SGE parameters
219 *
220 * Performs SGE initialization needed every time after a chip reset.
221 * We do not initialize any of the queue sets here, instead the driver
222 * top-level must request those individually. We also do not enable DMA
223 * here, that should be done after the queues have been set up.
224 */
225 void
t3_sge_init(adapter_t * adap,struct sge_params * p)226 t3_sge_init(adapter_t *adap, struct sge_params *p)
227 {
228 u_int ctrl, ups;
229
230 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
231
232 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
233 F_CQCRDTCTRL |
234 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
235 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
236 #if SGE_NUM_GENBITS == 1
237 ctrl |= F_EGRGENCTRL;
238 #endif
239 if (adap->params.rev > 0) {
240 if (!(adap->flags & (USING_MSIX | USING_MSI)))
241 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
242 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
243 }
244 t3_write_reg(adap, A_SG_CONTROL, ctrl);
245 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
246 V_LORCQDRBTHRSH(512));
247 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
248 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
249 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
250 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
251 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
252 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
253 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
254 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
255 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
256 }
257
258
259 /**
260 * sgl_len - calculates the size of an SGL of the given capacity
261 * @n: the number of SGL entries
262 *
263 * Calculates the number of flits needed for a scatter/gather list that
264 * can hold the given number of entries.
265 */
266 static __inline unsigned int
sgl_len(unsigned int n)267 sgl_len(unsigned int n)
268 {
269 return ((3 * n) / 2 + (n & 1));
270 }
271
272 /**
273 * get_imm_packet - return the next ingress packet buffer from a response
274 * @resp: the response descriptor containing the packet data
275 *
276 * Return a packet containing the immediate data of the given response.
277 */
278 #ifdef DISABLE_MBUF_IOVEC
279 static __inline int
get_imm_packet(adapter_t * sc,const struct rsp_desc * resp,struct t3_mbuf_hdr * mh)280 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
281 {
282 struct mbuf *m;
283 int len;
284 uint32_t flags = ntohl(resp->flags);
285 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
286
287 /*
288 * would be a firmware bug
289 */
290 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
291 return (0);
292
293 m = m_gethdr(M_NOWAIT, MT_DATA);
294 len = G_RSPD_LEN(ntohl(resp->len_cq));
295
296 if (m) {
297 MH_ALIGN(m, IMMED_PKT_SIZE);
298 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
299 m->m_len = len;
300
301 switch (sopeop) {
302 case RSPQ_SOP_EOP:
303 mh->mh_head = mh->mh_tail = m;
304 m->m_pkthdr.len = len;
305 m->m_flags |= M_PKTHDR;
306 break;
307 case RSPQ_EOP:
308 m->m_flags &= ~M_PKTHDR;
309 mh->mh_head->m_pkthdr.len += len;
310 mh->mh_tail->m_next = m;
311 mh->mh_tail = m;
312 break;
313 }
314 }
315 return (m != NULL);
316 }
317
318 #else
319 static int
get_imm_packet(adapter_t * sc,const struct rsp_desc * resp,struct mbuf * m,void * cl,uint32_t flags)320 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
321 {
322 int len, error;
323 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
324
325 /*
326 * would be a firmware bug
327 */
328 len = G_RSPD_LEN(ntohl(resp->len_cq));
329 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
330 if (cxgb_debug)
331 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
332 bogus_imm++;
333 return (EINVAL);
334 }
335 error = 0;
336 switch (sopeop) {
337 case RSPQ_SOP_EOP:
338 m->m_len = m->m_pkthdr.len = len;
339 memcpy(mtod(m, uint8_t *), resp->imm_data, len);
340 break;
341 case RSPQ_EOP:
342 memcpy(cl, resp->imm_data, len);
343 m_iovappend(m, cl, MSIZE, len, 0);
344 break;
345 default:
346 bogus_imm++;
347 error = EINVAL;
348 }
349
350 return (error);
351 }
352 #endif
353
354 static __inline u_int
flits_to_desc(u_int n)355 flits_to_desc(u_int n)
356 {
357 return (flit_desc_map[n]);
358 }
359
360 void
t3_sge_err_intr_handler(adapter_t * adapter)361 t3_sge_err_intr_handler(adapter_t *adapter)
362 {
363 unsigned int v, status;
364
365
366 status = t3_read_reg(adapter, A_SG_INT_CAUSE);
367
368 if (status & F_RSPQCREDITOVERFOW)
369 CH_ALERT(adapter, "SGE response queue credit overflow\n");
370
371 if (status & F_RSPQDISABLED) {
372 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
373
374 CH_ALERT(adapter,
375 "packet delivered to disabled response queue (0x%x)\n",
376 (v >> S_RSPQ0DISABLED) & 0xff);
377 }
378
379 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
380 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
381 t3_fatal_err(adapter);
382 }
383
384 void
t3_sge_prep(adapter_t * adap,struct sge_params * p)385 t3_sge_prep(adapter_t *adap, struct sge_params *p)
386 {
387 int i;
388
389 /* XXX Does ETHER_ALIGN need to be accounted for here? */
390 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
391
392 for (i = 0; i < SGE_QSETS; ++i) {
393 struct qset_params *q = p->qset + i;
394
395 q->polling = adap->params.rev > 0;
396
397 if (adap->params.nports > 2)
398 q->coalesce_nsecs = 50000;
399 else
400 q->coalesce_nsecs = 5000;
401
402 q->rspq_size = RSPQ_Q_SIZE;
403 q->fl_size = FL_Q_SIZE;
404 q->jumbo_size = JUMBO_Q_SIZE;
405 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
406 q->txq_size[TXQ_OFLD] = 1024;
407 q->txq_size[TXQ_CTRL] = 256;
408 q->cong_thres = 0;
409 }
410 }
411
412 int
t3_sge_alloc(adapter_t * sc)413 t3_sge_alloc(adapter_t *sc)
414 {
415 /* The parent tag. */
416 sc->parent_dmat = sc->pa.pa_dmat;
417
418 /*
419 * DMA tag for normal sized RX frames
420 */
421 sc->rx_dmat = sc->pa.pa_dmat;
422
423 /*
424 * DMA tag for jumbo sized RX frames.
425 */
426 sc->rx_jumbo_dmat = sc->pa.pa_dmat;
427
428 /*
429 * DMA tag for TX frames.
430 */
431 sc->tx_dmat = sc->pa.pa_dmat;
432
433 return (0);
434 }
435
436 int
t3_sge_free(struct adapter * sc)437 t3_sge_free(struct adapter * sc)
438 {
439 return (0);
440 }
441
442 void
t3_update_qset_coalesce(struct sge_qset * qs,const struct qset_params * p)443 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
444 {
445
446 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
447 qs->rspq.polling = 0 /* p->polling */;
448 }
449
450 /**
451 * refill_fl - refill an SGE free-buffer list
452 * @sc: the controller softc
453 * @q: the free-list to refill
454 * @n: the number of new buffers to allocate
455 *
456 * (Re)populate an SGE free-buffer list with up to @n new packet buffers.
457 * The caller must assure that @n does not exceed the queue's capacity.
458 */
459 static void
refill_fl(adapter_t * sc,struct sge_fl * q,int n)460 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
461 {
462 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
463 struct rx_desc *d = &q->desc[q->pidx];
464 void *cl;
465 int err;
466
467 while (n--) {
468 /*
469 * We only allocate a cluster, mbuf allocation happens after rx
470 */
471 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0)
472 {
473 err = bus_dmamap_create(sc->pa.pa_dmat,
474 q->buf_size, 1, q->buf_size, 0,
475 BUS_DMA_ALLOCNOW, &sd->map);
476 if (err != 0)
477 {
478 log(LOG_WARNING, "failure in refill_fl\n");
479 return;
480 }
481 sd->flags |= RX_SW_DESC_MAP_CREATED;
482 }
483 cl = malloc(q->buf_size, M_DEVBUF, M_NOWAIT);
484 if (cl == NULL)
485 {
486 log(LOG_WARNING, "Failed to allocate cluster\n");
487 break;
488 }
489 err = bus_dmamap_load(sc->pa.pa_dmat, sd->map, cl, q->buf_size, NULL, BUS_DMA_NOWAIT);
490 if (err)
491 {
492 log(LOG_WARNING, "failure in refill_fl\n");
493 free(cl, M_DEVBUF);
494 return;
495 }
496
497 sd->flags |= RX_SW_DESC_INUSE;
498 sd->cl = cl;
499 d->addr_lo = htobe32(sd->map->dm_segs[0].ds_addr & 0xffffffff);
500 d->addr_hi = htobe32(((uint64_t)sd->map->dm_segs[0].ds_addr>>32) & 0xffffffff);
501 d->len_gen = htobe32(V_FLD_GEN1(q->gen));
502 d->gen2 = htobe32(V_FLD_GEN2(q->gen));
503
504 d++;
505 sd++;
506
507 if (++q->pidx == q->size) {
508 q->pidx = 0;
509 q->gen ^= 1;
510 sd = q->sdesc;
511 d = q->desc;
512 }
513 q->credits++;
514 }
515
516 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
517 }
518
519
520 /**
521 * free_rx_bufs - free the Rx buffers on an SGE free list
522 * @sc: the controle softc
523 * @q: the SGE free list to clean up
524 *
525 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
526 * this queue should be stopped before calling this function.
527 */
528 static void
free_rx_bufs(adapter_t * sc,struct sge_fl * q)529 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
530 {
531 u_int cidx = q->cidx;
532
533 while (q->credits--) {
534 struct rx_sw_desc *d = &q->sdesc[cidx];
535
536 if (d->flags & RX_SW_DESC_INUSE) {
537 bus_dmamap_unload(q->entry_tag, d->map);
538 bus_dmamap_destroy(q->entry_tag, d->map);
539 d->map = NULL;
540 free(d->cl, M_DEVBUF);
541 d->cl = NULL;
542 }
543 d->cl = NULL;
544 if (++cidx == q->size)
545 cidx = 0;
546 }
547 }
548
549 static __inline void
__refill_fl(adapter_t * adap,struct sge_fl * fl)550 __refill_fl(adapter_t *adap, struct sge_fl *fl)
551 {
552 refill_fl(adap, fl, min(16U, fl->size - fl->credits));
553 }
554
555 #ifndef DISABLE_MBUF_IOVEC
556 /**
557 * recycle_rx_buf - recycle a receive buffer
558 * @adapter: the adapter
559 * @q: the SGE free list
560 * @idx: index of buffer to recycle
561 *
562 * Recycles the specified buffer on the given free list by adding it at
563 * the next available slot on the list.
564 */
565 static void
recycle_rx_buf(adapter_t * adap,struct sge_fl * q,unsigned int idx)566 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
567 {
568 struct rx_desc *from = &q->desc[idx];
569 struct rx_desc *to = &q->desc[q->pidx];
570
571 q->sdesc[q->pidx] = q->sdesc[idx];
572 to->addr_lo = from->addr_lo; // already big endian
573 to->addr_hi = from->addr_hi; // likewise
574 wmb();
575 to->len_gen = htobe32(V_FLD_GEN1(q->gen));
576 to->gen2 = htobe32(V_FLD_GEN2(q->gen));
577 q->credits++;
578
579 if (++q->pidx == q->size) {
580 q->pidx = 0;
581 q->gen ^= 1;
582 }
583 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
584 }
585 #endif
586
587 static int
alloc_ring(adapter_t * sc,size_t nelem,size_t elem_size,size_t sw_size,bus_addr_t * phys,void * desc,void * sdesc,bus_dma_tag_t * tag,bus_dmamap_t * map,bus_dma_tag_t parent_entry_tag,bus_dma_tag_t * entry_tag)588 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
589 bus_addr_t *phys,
590 void *desc, void *sdesc, bus_dma_tag_t *tag,
591 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
592 {
593 size_t len = nelem * elem_size;
594 void *s = NULL;
595 void *p = NULL;
596 int err;
597 bus_dma_segment_t phys_seg;
598
599 int nsegs;
600
601 *tag = sc->pa.pa_dmat;
602
603 /* allocate wired physical memory for DMA descriptor array */
604 err = bus_dmamem_alloc(*tag, len, PAGE_SIZE, 0, &phys_seg, 1,
605 &nsegs, BUS_DMA_NOWAIT);
606 if (err != 0)
607 {
608 device_printf(sc->dev, "Cannot allocate descriptor memory\n");
609 return (ENOMEM);
610 }
611 *phys = phys_seg.ds_addr;
612
613 /* map physical address to kernel virtual address */
614 err = bus_dmamem_map(*tag, &phys_seg, 1, len, &p,
615 BUS_DMA_NOWAIT|BUS_DMA_COHERENT);
616 if (err != 0)
617 {
618 device_printf(sc->dev, "Cannot map descriptor memory\n");
619 return (ENOMEM);
620 }
621
622 memset(p, 0, len);
623 *(void **)desc = p;
624
625 if (sw_size)
626 {
627 len = nelem * sw_size;
628 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
629 *(void **)sdesc = s;
630 }
631
632 if (parent_entry_tag == NULL)
633 return (0);
634 *entry_tag = sc->pa.pa_dmat;
635
636 return (0);
637 }
638
639 static void
sge_slow_intr_handler(struct work * wk,void * arg)640 sge_slow_intr_handler(struct work *wk, void *arg)
641 {
642 adapter_t *sc = arg;
643
644 t3_slow_intr_handler(sc);
645 }
646
647 /**
648 * sge_timer_cb - perform periodic maintenance of an SGE qset
649 * @data: the SGE queue set to maintain
650 *
651 * Runs periodically from a timer to perform maintenance of an SGE queue
652 * set. It performs two tasks:
653 *
654 * a) Cleans up any completed Tx descriptors that may still be pending.
655 * Normal descriptor cleanup happens when new packets are added to a Tx
656 * queue so this timer is relatively infrequent and does any cleanup only
657 * if the Tx queue has not seen any new packets in a while. We make a
658 * best effort attempt to reclaim descriptors, in that we don't wait
659 * around if we cannot get a queue's lock (which most likely is because
660 * someone else is queueing new packets and so will also handle the clean
661 * up). Since control queues use immediate data exclusively we don't
662 * bother cleaning them up here.
663 *
664 * b) Replenishes Rx queues that have run out due to memory shortage.
665 * Normally new Rx buffers are added when existing ones are consumed but
666 * when out of memory a queue can become empty. We try to add only a few
667 * buffers here, the queue will be replenished fully as these new buffers
668 * are used up if memory shortage has subsided.
669 *
670 * c) Return coalesced response queue credits in case a response queue is
671 * starved.
672 *
673 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell
674 * fifo overflows and the FW doesn't implement any recovery scheme yet.
675 */
676
677 static void
sge_timer_cb(void * arg)678 sge_timer_cb(void *arg)
679 {
680 adapter_t *sc = arg;
681 struct port_info *p;
682 struct sge_qset *qs;
683 struct sge_txq *txq;
684 int i, j;
685 int reclaim_eth, reclaim_ofl, refill_rx;
686
687 for (i = 0; i < sc->params.nports; i++)
688 for (j = 0; j < sc->port[i].nqsets; j++) {
689 qs = &sc->sge.qs[i + j];
690 txq = &qs->txq[0];
691 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
692 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
693 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
694 (qs->fl[1].credits < qs->fl[1].size));
695 if (reclaim_eth || reclaim_ofl || refill_rx) {
696 p = &sc->port[i];
697 workqueue_enqueue(p->timer_reclaim_task.wq, &p->timer_reclaim_task.w, NULL);
698 break;
699 }
700 }
701 if (sc->params.nports > 2) {
702 int k;
703
704 for_each_port(sc, k) {
705 struct port_info *pi = &sc->port[k];
706
707 t3_write_reg(sc, A_SG_KDOORBELL,
708 F_SELEGRCNTX |
709 (FW_TUNNEL_SGEEC_START + pi->first_qset));
710 }
711 }
712 if (sc->open_device_map != 0)
713 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
714 }
715
716 /*
717 * This is meant to be a catch-all function to keep sge state private
718 * to sge.c
719 *
720 */
721 int
t3_sge_init_adapter(adapter_t * sc)722 t3_sge_init_adapter(adapter_t *sc)
723 {
724 callout_init(&sc->sge_timer_ch, 0);
725 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
726 sc->slow_intr_task.name = "sge_slow_intr";
727 sc->slow_intr_task.func = sge_slow_intr_handler;
728 sc->slow_intr_task.context = sc;
729 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &sc->slow_intr_task, NULL, "cxgb_make_task");
730 return (0);
731 }
732
733 int
t3_sge_init_port(struct port_info * p)734 t3_sge_init_port(struct port_info *p)
735 {
736 p->timer_reclaim_task.name = "sge_timer_reclaim";
737 p->timer_reclaim_task.func = sge_timer_reclaim;
738 p->timer_reclaim_task.context = p;
739 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &p->timer_reclaim_task, NULL, "cxgb_make_task");
740
741 return (0);
742 }
743
744 void
t3_sge_deinit_sw(adapter_t * sc)745 t3_sge_deinit_sw(adapter_t *sc)
746 {
747 callout_drain(&sc->sge_timer_ch);
748 }
749
750 /**
751 * refill_rspq - replenish an SGE response queue
752 * @adapter: the adapter
753 * @q: the response queue to replenish
754 * @credits: how many new responses to make available
755 *
756 * Replenishes a response queue by making the supplied number of responses
757 * available to HW.
758 */
759 static __inline void
refill_rspq(adapter_t * sc,const struct sge_rspq * q,u_int credits)760 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
761 {
762
763 /* mbufs are allocated on demand when a rspq entry is processed. */
764 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
765 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
766 }
767
768 static __inline void
sge_txq_reclaim_(struct sge_txq * txq)769 sge_txq_reclaim_(struct sge_txq *txq)
770 {
771 int reclaimable, i, n;
772 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
773 struct port_info *p;
774
775 p = txq->port;
776 reclaim_more:
777 n = 0;
778 reclaimable = desc_reclaimable(txq);
779 if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
780 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
781 mtx_unlock(&txq->lock);
782 }
783 if (n == 0)
784 return;
785
786 for (i = 0; i < n; i++) {
787 m_freem_vec(m_vec[i]);
788 }
789 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
790 txq->size - txq->in_use >= TX_START_MAX_DESC) {
791 txq_fills++;
792 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
793 workqueue_enqueue(p->start_task.wq, &p->start_task.w, NULL);
794 }
795
796 if (n)
797 goto reclaim_more;
798 }
799
800 static void
sge_txq_reclaim_handler(struct work * wk,void * arg)801 sge_txq_reclaim_handler(struct work *wk, void *arg)
802 {
803 struct sge_txq *q = arg;
804
805 sge_txq_reclaim_(q);
806 }
807
808 static void
sge_timer_reclaim(struct work * wk,void * arg)809 sge_timer_reclaim(struct work *wk, void *arg)
810 {
811 struct port_info *p = arg;
812 int i, nqsets = p->nqsets;
813 adapter_t *sc = p->adapter;
814 struct sge_qset *qs;
815 struct sge_txq *txq;
816 struct mtx *lock;
817
818 for (i = 0; i < nqsets; i++) {
819 qs = &sc->sge.qs[i];
820 txq = &qs->txq[TXQ_ETH];
821 sge_txq_reclaim_(txq);
822
823 txq = &qs->txq[TXQ_OFLD];
824 sge_txq_reclaim_(txq);
825
826 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
827 &sc->sge.qs[0].rspq.lock;
828
829 if (mtx_trylock(lock)) {
830 /* XXX currently assume that we are *NOT* polling */
831 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
832
833 if (qs->fl[0].credits < qs->fl[0].size - 16)
834 __refill_fl(sc, &qs->fl[0]);
835 if (qs->fl[1].credits < qs->fl[1].size - 16)
836 __refill_fl(sc, &qs->fl[1]);
837
838 if (status & (1 << qs->rspq.cntxt_id)) {
839 if (qs->rspq.credits) {
840 refill_rspq(sc, &qs->rspq, 1);
841 qs->rspq.credits--;
842 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
843 1 << qs->rspq.cntxt_id);
844 }
845 }
846 mtx_unlock(lock);
847 }
848 }
849 }
850
851 /**
852 * init_qset_cntxt - initialize an SGE queue set context info
853 * @qs: the queue set
854 * @id: the queue set id
855 *
856 * Initializes the TIDs and context ids for the queues of a queue set.
857 */
858 static void
init_qset_cntxt(struct sge_qset * qs,u_int id)859 init_qset_cntxt(struct sge_qset *qs, u_int id)
860 {
861
862 qs->rspq.cntxt_id = id;
863 qs->fl[0].cntxt_id = 2 * id;
864 qs->fl[1].cntxt_id = 2 * id + 1;
865 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
866 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
867 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
868 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
869 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
870 }
871
872
873 static void
txq_prod(struct sge_txq * txq,unsigned int ndesc,struct txq_state * txqs)874 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
875 {
876 txq->in_use += ndesc;
877 /*
878 * XXX we don't handle stopping of queue
879 * presumably start handles this when we bump against the end
880 */
881 txqs->gen = txq->gen;
882 txq->unacked += ndesc;
883 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
884 txq->unacked &= 7;
885 txqs->pidx = txq->pidx;
886 txq->pidx += ndesc;
887
888 if (txq->pidx >= txq->size) {
889 txq->pidx -= txq->size;
890 txq->gen ^= 1;
891 }
892
893 }
894
895 /**
896 * calc_tx_descs - calculate the number of Tx descriptors for a packet
897 * @m: the packet mbufs
898 * @nsegs: the number of segments
899 *
900 * Returns the number of Tx descriptors needed for the given Ethernet
901 * packet. Ethernet packets require addition of WR and CPL headers.
902 */
903 static __inline unsigned int
calc_tx_descs(const struct mbuf * m,int nsegs)904 calc_tx_descs(const struct mbuf *m, int nsegs)
905 {
906 unsigned int flits;
907
908 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
909 return 1;
910
911 flits = sgl_len(nsegs) + 2;
912 #ifdef TSO_SUPPORTED
913 if (m->m_pkthdr.csum_flags & (CSUM_TSO))
914 flits++;
915 #endif
916 return flits_to_desc(flits);
917 }
918
919 static unsigned int
busdma_map_mbufs(struct mbuf ** m,struct sge_txq * txq,struct tx_sw_desc * stx,bus_dma_segment_t * segs,int * nsegs)920 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
921 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
922 {
923 struct mbuf *m0;
924 int err, pktlen;
925 int i, total_len;
926
927 m0 = *m;
928 pktlen = m0->m_pkthdr.len;
929
930 m0 = *m;
931 i = 0;
932 total_len = 0;
933 while (m0)
934 {
935 i++;
936 total_len += m0->m_len;
937 m0 = m0->m_next;
938 }
939 err = bus_dmamap_create(txq->entry_tag, total_len, TX_MAX_SEGS, total_len, 0, BUS_DMA_NOWAIT, &stx->map);
940 if (err)
941 return (err);
942 err = bus_dmamap_load_mbuf(txq->entry_tag, stx->map, *m, 0);
943 if (err)
944 return (err);
945 // feed out the physical mappings
946 *nsegs = stx->map->dm_nsegs;
947 for (i=0; i<*nsegs; i++)
948 {
949 segs[i] = stx->map->dm_segs[i];
950 }
951 #ifdef DEBUG
952 if (err) {
953 int n = 0;
954 struct mbuf *mtmp = m0;
955 while(mtmp) {
956 n++;
957 mtmp = mtmp->m_next;
958 }
959 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
960 err, m0->m_pkthdr.len, n);
961 }
962 #endif
963 if (err == EFBIG) {
964 /* Too many segments, try to defrag */
965 m0 = m_defrag(m0, M_DONTWAIT);
966 if (m0 == NULL) {
967 m_freem(*m);
968 *m = NULL;
969 return (ENOBUFS);
970 }
971 *m = m0;
972 INT3; // XXXXXXXXXXXXXXXXXX like above!
973 }
974
975 if (err == ENOMEM) {
976 return (err);
977 }
978
979 if (err) {
980 if (cxgb_debug)
981 printf("map failure err=%d pktlen=%d\n", err, pktlen);
982 m_freem_vec(m0);
983 *m = NULL;
984 return (err);
985 }
986
987 bus_dmamap_sync(txq->entry_tag, stx->map, 0, pktlen, BUS_DMASYNC_PREWRITE);
988 stx->flags |= TX_SW_DESC_MAPPED;
989
990 return (0);
991 }
992
993 /**
994 * make_sgl - populate a scatter/gather list for a packet
995 * @sgp: the SGL to populate
996 * @segs: the packet dma segments
997 * @nsegs: the number of segments
998 *
999 * Generates a scatter/gather list for the buffers that make up a packet
1000 * and returns the SGL size in 8-byte words. The caller must size the SGL
1001 * appropriately.
1002 */
1003 static __inline void
make_sgl(struct sg_ent * sgp,bus_dma_segment_t * segs,int nsegs)1004 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1005 {
1006 int i, idx;
1007
1008 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
1009 if (i && idx == 0)
1010 ++sgp;
1011
1012 sgp->len[idx] = htobe32(segs[i].ds_len);
1013 sgp->addr[idx] = htobe64(segs[i].ds_addr);
1014 }
1015
1016 if (idx)
1017 sgp->len[idx] = 0;
1018 }
1019
1020 /**
1021 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1022 * @adap: the adapter
1023 * @q: the Tx queue
1024 *
1025 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
1026 * where the HW is going to sleep just after we checked, however,
1027 * then the interrupt handler will detect the outstanding TX packet
1028 * and ring the doorbell for us.
1029 *
1030 * When GTS is disabled we unconditionally ring the doorbell.
1031 */
1032 static __inline void
check_ring_tx_db(adapter_t * adap,struct sge_txq * q)1033 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1034 {
1035 #if USE_GTS
1036 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1037 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1038 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1039 #ifdef T3_TRACE
1040 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1041 q->cntxt_id);
1042 #endif
1043 t3_write_reg(adap, A_SG_KDOORBELL,
1044 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1045 }
1046 #else
1047 wmb(); /* write descriptors before telling HW */
1048 t3_write_reg(adap, A_SG_KDOORBELL,
1049 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1050 #endif
1051 }
1052
1053 static __inline void
wr_gen2(struct tx_desc * d,unsigned int gen)1054 wr_gen2(struct tx_desc *d, unsigned int gen)
1055 {
1056 #if SGE_NUM_GENBITS == 2
1057 d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1058 #endif
1059 }
1060
1061
1062
1063 /**
1064 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
1065 * @ndesc: number of Tx descriptors spanned by the SGL
1066 * @txd: first Tx descriptor to be written
1067 * @txqs: txq state (generation and producer index)
1068 * @txq: the SGE Tx queue
1069 * @sgl: the SGL
1070 * @flits: number of flits to the start of the SGL in the first descriptor
1071 * @sgl_flits: the SGL size in flits
1072 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
1073 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
1074 *
1075 * Write a work request header and an associated SGL. If the SGL is
1076 * small enough to fit into one Tx descriptor it has already been written
1077 * and we just need to write the WR header. Otherwise we distribute the
1078 * SGL across the number of descriptors it spans.
1079 */
1080
1081 static void
write_wr_hdr_sgl(unsigned int ndesc,struct tx_desc * txd,struct txq_state * txqs,const struct sge_txq * txq,const struct sg_ent * sgl,unsigned int flits,unsigned int sgl_flits,unsigned int wr_hi,unsigned int wr_lo)1082 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1083 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1084 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1085 {
1086
1087 struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1088 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1089
1090 if (__predict_true(ndesc == 1)) {
1091 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1092 V_WR_SGLSFLT(flits)) | wr_hi;
1093 wmb();
1094 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1095 V_WR_GEN(txqs->gen)) | wr_lo;
1096 /* XXX gen? */
1097 wr_gen2(txd, txqs->gen);
1098 } else {
1099 unsigned int ogen = txqs->gen;
1100 const uint64_t *fp = (const uint64_t *)sgl;
1101 struct work_request_hdr *wp = wrp;
1102
1103 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1104 V_WR_SGLSFLT(flits)) | wr_hi;
1105
1106 while (sgl_flits) {
1107 unsigned int avail = WR_FLITS - flits;
1108
1109 if (avail > sgl_flits)
1110 avail = sgl_flits;
1111 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1112 sgl_flits -= avail;
1113 ndesc--;
1114 if (!sgl_flits)
1115 break;
1116
1117 fp += avail;
1118 txd++;
1119 txsd++;
1120 if (++txqs->pidx == txq->size) {
1121 txqs->pidx = 0;
1122 txqs->gen ^= 1;
1123 txd = txq->desc;
1124 txsd = txq->sdesc;
1125 }
1126
1127 /*
1128 * when the head of the mbuf chain
1129 * is freed all clusters will be freed
1130 * with it
1131 */
1132 txsd->m = NULL;
1133 wrp = (struct work_request_hdr *)txd;
1134 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1135 V_WR_SGLSFLT(1)) | wr_hi;
1136 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1137 sgl_flits + 1)) |
1138 V_WR_GEN(txqs->gen)) | wr_lo;
1139 wr_gen2(txd, txqs->gen);
1140 flits = 1;
1141 }
1142 wrp->wr_hi |= htonl(F_WR_EOP);
1143 wmb();
1144 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1145 wr_gen2((struct tx_desc *)wp, ogen);
1146 }
1147 }
1148
1149
1150 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1151 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1152
1153 int
t3_encap(struct port_info * p,struct mbuf ** m,int * free_it)1154 t3_encap(struct port_info *p, struct mbuf **m, int *free_it)
1155 {
1156 adapter_t *sc;
1157 struct mbuf *m0;
1158 struct sge_qset *qs;
1159 struct sge_txq *txq;
1160 struct tx_sw_desc *stx;
1161 struct txq_state txqs;
1162 unsigned int ndesc, flits, cntrl, mlen;
1163 int err, nsegs, tso_info = 0;
1164
1165 struct work_request_hdr *wrp;
1166 struct tx_sw_desc *txsd;
1167 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1168 bus_dma_segment_t segs[TX_MAX_SEGS];
1169 uint32_t wr_hi, wr_lo, sgl_flits;
1170
1171 struct tx_desc *txd;
1172 struct cpl_tx_pkt *cpl;
1173
1174 m0 = *m;
1175 sc = p->adapter;
1176
1177 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
1178
1179 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
1180
1181 qs = &sc->sge.qs[p->first_qset];
1182
1183 txq = &qs->txq[TXQ_ETH];
1184 stx = &txq->sdesc[txq->pidx];
1185 txd = &txq->desc[txq->pidx];
1186 cpl = (struct cpl_tx_pkt *)txd;
1187 mlen = m0->m_pkthdr.len;
1188 cpl->len = htonl(mlen | 0x80000000);
1189
1190 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
1191 /*
1192 * XXX handle checksum, TSO, and VLAN here
1193 *
1194 */
1195 cntrl = V_TXPKT_INTF(p->txpkt_intf);
1196
1197 /*
1198 * XXX need to add VLAN support for 6.x
1199 */
1200 #ifdef VLAN_SUPPORTED
1201 if (m0->m_flags & M_VLANTAG)
1202 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1203 if (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1204 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1205 #endif
1206 if (tso_info) {
1207 int eth_type;
1208 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1209 struct ip *ip;
1210 struct tcphdr *tcp;
1211 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1212
1213 txd->flit[2] = 0;
1214 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1215 hdr->cntrl = htonl(cntrl);
1216
1217 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1218 pkthdr = &tmp[0];
1219 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1220 } else {
1221 pkthdr = mtod(m0, char *);
1222 }
1223
1224 #ifdef VLAN_SUPPORTED
1225 if (__predict_false(m0->m_flags & M_VLANTAG)) {
1226 eth_type = CPL_ETH_II_VLAN;
1227 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1228 ETHER_VLAN_ENCAP_LEN);
1229 } else {
1230 eth_type = CPL_ETH_II;
1231 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1232 }
1233 #else
1234 eth_type = CPL_ETH_II;
1235 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1236 #endif
1237 tcp = (struct tcphdr *)((uint8_t *)ip +
1238 sizeof(*ip));
1239
1240 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1241 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1242 V_LSO_TCPHDR_WORDS(tcp->th_off);
1243 hdr->lso_info = htonl(tso_info);
1244 flits = 3;
1245 } else {
1246 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1247 cpl->cntrl = htonl(cntrl);
1248
1249 if (mlen <= WR_LEN - sizeof(*cpl)) {
1250 txq_prod(txq, 1, &txqs);
1251 txq->sdesc[txqs.pidx].m = NULL;
1252
1253 if (m0->m_len == m0->m_pkthdr.len)
1254 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1255 else
1256 m_copydata(m0, 0, mlen, (void *)&txd->flit[2]);
1257
1258 *free_it = 1;
1259 flits = (mlen + 7) / 8 + 2;
1260 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1261 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1262 F_WR_SOP | F_WR_EOP | txqs.compl);
1263 wmb();
1264 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1265 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1266
1267 wr_gen2(txd, txqs.gen);
1268 check_ring_tx_db(sc, txq);
1269 return (0);
1270 }
1271 flits = 2;
1272 }
1273
1274 wrp = (struct work_request_hdr *)txd;
1275
1276 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1277 return (err);
1278 }
1279 m0 = *m;
1280 ndesc = calc_tx_descs(m0, nsegs);
1281
1282 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1283 make_sgl(sgp, segs, nsegs);
1284
1285 sgl_flits = sgl_len(nsegs);
1286
1287 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1288 txq_prod(txq, ndesc, &txqs);
1289 txsd = &txq->sdesc[txqs.pidx];
1290 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1291 wr_lo = htonl(V_WR_TID(txq->token));
1292 txsd->m = m0;
1293 m_set_priority(m0, txqs.pidx);
1294
1295 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1296 check_ring_tx_db(p->adapter, txq);
1297
1298 return (0);
1299 }
1300
1301
1302 /**
1303 * write_imm - write a packet into a Tx descriptor as immediate data
1304 * @d: the Tx descriptor to write
1305 * @m: the packet
1306 * @len: the length of packet data to write as immediate data
1307 * @gen: the generation bit value to write
1308 *
1309 * Writes a packet as immediate data into a Tx descriptor. The packet
1310 * contains a work request at its beginning. We must write the packet
1311 * carefully so the SGE doesn't read accidentally before it's written in
1312 * its entirety.
1313 */
1314 static __inline void
write_imm(struct tx_desc * d,struct mbuf * m,unsigned int len,unsigned int gen)1315 write_imm(struct tx_desc *d, struct mbuf *m,
1316 unsigned int len, unsigned int gen)
1317 {
1318 struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1319 struct work_request_hdr *to = (struct work_request_hdr *)d;
1320
1321 memcpy(&to[1], &from[1], len - sizeof(*from));
1322 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1323 V_WR_BCNTLFLT(len & 7));
1324 wmb();
1325 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1326 V_WR_LEN((len + 7) / 8));
1327 wr_gen2(d, gen);
1328 m_freem(m);
1329 }
1330
1331 /**
1332 * check_desc_avail - check descriptor availability on a send queue
1333 * @adap: the adapter
1334 * @q: the TX queue
1335 * @m: the packet needing the descriptors
1336 * @ndesc: the number of Tx descriptors needed
1337 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1338 *
1339 * Checks if the requested number of Tx descriptors is available on an
1340 * SGE send queue. If the queue is already suspended or not enough
1341 * descriptors are available the packet is queued for later transmission.
1342 * Must be called with the Tx queue locked.
1343 *
1344 * Returns 0 if enough descriptors are available, 1 if there aren't
1345 * enough descriptors and the packet has been queued, and 2 if the caller
1346 * needs to retry because there weren't enough descriptors at the
1347 * beginning of the call but some freed up in the mean time.
1348 */
1349 static __inline int
check_desc_avail(adapter_t * adap,struct sge_txq * q,struct mbuf * m,unsigned int ndesc,unsigned int qid)1350 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1351 struct mbuf *m, unsigned int ndesc,
1352 unsigned int qid)
1353 {
1354 /*
1355 * XXX We currently only use this for checking the control queue
1356 * the control queue is only used for binding qsets which happens
1357 * at init time so we are guaranteed enough descriptors
1358 */
1359 if (__predict_false(!mbufq_empty(&q->sendq))) {
1360 addq_exit: mbufq_tail(&q->sendq, m);
1361 return 1;
1362 }
1363 if (__predict_false(q->size - q->in_use < ndesc)) {
1364
1365 struct sge_qset *qs = txq_to_qset(q, qid);
1366
1367 setbit(&qs->txq_stopped, qid);
1368 smp_mb();
1369
1370 if (should_restart_tx(q) &&
1371 test_and_clear_bit(qid, &qs->txq_stopped))
1372 return 2;
1373
1374 q->stops++;
1375 goto addq_exit;
1376 }
1377 return 0;
1378 }
1379
1380
1381 /**
1382 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1383 * @q: the SGE control Tx queue
1384 *
1385 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1386 * that send only immediate data (presently just the control queues) and
1387 * thus do not have any mbufs
1388 */
1389 static __inline void
reclaim_completed_tx_imm(struct sge_txq * q)1390 reclaim_completed_tx_imm(struct sge_txq *q)
1391 {
1392 unsigned int reclaim = q->processed - q->cleaned;
1393
1394 mtx_assert(&q->lock, MA_OWNED);
1395
1396 q->in_use -= reclaim;
1397 q->cleaned += reclaim;
1398 }
1399
1400 static __inline int
immediate(const struct mbuf * m)1401 immediate(const struct mbuf *m)
1402 {
1403 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
1404 }
1405
1406 /**
1407 * ctrl_xmit - send a packet through an SGE control Tx queue
1408 * @adap: the adapter
1409 * @q: the control queue
1410 * @m: the packet
1411 *
1412 * Send a packet through an SGE control Tx queue. Packets sent through
1413 * a control queue must fit entirely as immediate data in a single Tx
1414 * descriptor and have no page fragments.
1415 */
1416 static int
ctrl_xmit(adapter_t * adap,struct sge_txq * q,struct mbuf * m)1417 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1418 {
1419 int ret;
1420 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1421
1422 if (__predict_false(!immediate(m))) {
1423 m_freem(m);
1424 return 0;
1425 }
1426
1427 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1428 wrp->wr_lo = htonl(V_WR_TID(q->token));
1429
1430 mtx_lock(&q->lock);
1431 again: reclaim_completed_tx_imm(q);
1432
1433 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1434 if (__predict_false(ret)) {
1435 if (ret == 1) {
1436 mtx_unlock(&q->lock);
1437 return (-1);
1438 }
1439 goto again;
1440 }
1441
1442 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1443
1444 q->in_use++;
1445 if (++q->pidx >= q->size) {
1446 q->pidx = 0;
1447 q->gen ^= 1;
1448 }
1449 mtx_unlock(&q->lock);
1450 wmb();
1451 t3_write_reg(adap, A_SG_KDOORBELL,
1452 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1453 return (0);
1454 }
1455
1456
1457 /**
1458 * restart_ctrlq - restart a suspended control queue
1459 * @qs: the queue set cotaining the control queue
1460 *
1461 * Resumes transmission on a suspended Tx control queue.
1462 */
1463 static void
restart_ctrlq(struct work * wk,void * data)1464 restart_ctrlq(struct work *wk, void *data)
1465 {
1466 struct mbuf *m;
1467 struct sge_qset *qs = (struct sge_qset *)data;
1468 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1469 adapter_t *adap = qs->port->adapter;
1470
1471 mtx_lock(&q->lock);
1472 again: reclaim_completed_tx_imm(q);
1473
1474 while (q->in_use < q->size &&
1475 (m = mbufq_dequeue(&q->sendq)) != NULL) {
1476
1477 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1478
1479 if (++q->pidx >= q->size) {
1480 q->pidx = 0;
1481 q->gen ^= 1;
1482 }
1483 q->in_use++;
1484 }
1485 if (!mbufq_empty(&q->sendq)) {
1486 setbit(&qs->txq_stopped, TXQ_CTRL);
1487 smp_mb();
1488
1489 if (should_restart_tx(q) &&
1490 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1491 goto again;
1492 q->stops++;
1493 }
1494 mtx_unlock(&q->lock);
1495 t3_write_reg(adap, A_SG_KDOORBELL,
1496 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1497 }
1498
1499
1500 /*
1501 * Send a management message through control queue 0
1502 */
1503 int
t3_mgmt_tx(struct adapter * adap,struct mbuf * m)1504 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1505 {
1506 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1507 }
1508
1509 /**
1510 * free_qset - free the resources of an SGE queue set
1511 * @sc: the controller owning the queue set
1512 * @q: the queue set
1513 *
1514 * Release the HW and SW resources associated with an SGE queue set, such
1515 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
1516 * queue set must be quiesced prior to calling this.
1517 */
1518 static void
t3_free_qset(adapter_t * sc,struct sge_qset * q)1519 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1520 {
1521 int i;
1522
1523 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1524 if (q->fl[i].desc) {
1525 mtx_lock(&sc->sge.reg_lock);
1526 t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1527 mtx_unlock(&sc->sge.reg_lock);
1528 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1529 INT3;
1530 // bus_dmamem_free(q->fl[i].desc_tag, &q->fl[i].phys_addr, 1);
1531 // XXXXXXXXXXX destroy DMA tags????
1532 }
1533 if (q->fl[i].sdesc) {
1534 free_rx_bufs(sc, &q->fl[i]);
1535 free(q->fl[i].sdesc, M_DEVBUF);
1536 }
1537 }
1538
1539 for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1540 if (q->txq[i].desc) {
1541 mtx_lock(&sc->sge.reg_lock);
1542 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1543 mtx_unlock(&sc->sge.reg_lock);
1544 bus_dmamap_unload(q->txq[i].desc_tag,
1545 q->txq[i].desc_map);
1546 INT3;
1547 // bus_dmamem_free(q->txq[i].desc_tag, &q->txq[i].phys_addr, 1);
1548 // XXXXXXXXXXX destroy DMA tags???? And the lock?!??!
1549
1550 }
1551 if (q->txq[i].sdesc) {
1552 free(q->txq[i].sdesc, M_DEVBUF);
1553 }
1554 }
1555
1556 if (q->rspq.desc) {
1557 mtx_lock(&sc->sge.reg_lock);
1558 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1559 mtx_unlock(&sc->sge.reg_lock);
1560
1561 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1562 INT3;
1563 // bus_dmamem_free(q->rspq.desc_tag, &q->rspq.phys_addr, 1);
1564 // XXXXXXXXXXX destroy DMA tags???? and the LOCK ?!?!?
1565 }
1566
1567 memset(q, 0, sizeof(*q));
1568 }
1569
1570 /**
1571 * t3_free_sge_resources - free SGE resources
1572 * @sc: the adapter softc
1573 *
1574 * Frees resources used by the SGE queue sets.
1575 */
1576 void
t3_free_sge_resources(adapter_t * sc)1577 t3_free_sge_resources(adapter_t *sc)
1578 {
1579 int i, nqsets;
1580
1581 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1582 nqsets += sc->port[i].nqsets;
1583
1584 for (i = 0; i < nqsets; ++i)
1585 t3_free_qset(sc, &sc->sge.qs[i]);
1586 }
1587
1588 /**
1589 * t3_sge_start - enable SGE
1590 * @sc: the controller softc
1591 *
1592 * Enables the SGE for DMAs. This is the last step in starting packet
1593 * transfers.
1594 */
1595 void
t3_sge_start(adapter_t * sc)1596 t3_sge_start(adapter_t *sc)
1597 {
1598 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1599 }
1600
1601 /**
1602 * t3_sge_stop - disable SGE operation
1603 * @sc: the adapter
1604 *
1605 * Disables the DMA engine. This can be called in emeregencies (e.g.,
1606 * from error interrupts) or from normal process context. In the latter
1607 * case it also disables any pending queue restart tasklets. Note that
1608 * if it is called in interrupt context it cannot disable the restart
1609 * tasklets as it cannot wait, however the tasklets will have no effect
1610 * since the doorbells are disabled and the driver will call this again
1611 * later from process context, at which time the tasklets will be stopped
1612 * if they are still running.
1613 */
1614 void
t3_sge_stop(adapter_t * sc)1615 t3_sge_stop(adapter_t *sc)
1616 {
1617 int i, nqsets;
1618
1619 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1620
1621 for (nqsets = i = 0; i < (sc)->params.nports; i++)
1622 nqsets += sc->port[i].nqsets;
1623
1624 for (i = 0; i < nqsets; ++i) {
1625 }
1626 }
1627
1628
1629 /**
1630 * free_tx_desc - reclaims Tx descriptors and their buffers
1631 * @adapter: the adapter
1632 * @q: the Tx queue to reclaim descriptors from
1633 * @n: the number of descriptors to reclaim
1634 *
1635 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1636 * Tx buffers. Called with the Tx queue lock held.
1637 */
1638 int
free_tx_desc(struct sge_txq * q,int n,struct mbuf ** m_vec)1639 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
1640 {
1641 struct tx_sw_desc *d;
1642 unsigned int cidx = q->cidx;
1643 int nbufs = 0;
1644
1645 #ifdef T3_TRACE
1646 T3_TRACE2(sc->tb[q->cntxt_id & 7],
1647 "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1648 #endif
1649 d = &q->sdesc[cidx];
1650
1651 while (n-- > 0) {
1652 DPRINTF("cidx=%d d=%p\n", cidx, d);
1653 if (d->m) {
1654 if (d->flags & TX_SW_DESC_MAPPED) {
1655 bus_dmamap_unload(q->entry_tag, d->map);
1656 bus_dmamap_destroy(q->entry_tag, d->map);
1657 d->flags &= ~TX_SW_DESC_MAPPED;
1658 }
1659 if (m_get_priority(d->m) == cidx) {
1660 m_vec[nbufs] = d->m;
1661 d->m = NULL;
1662 nbufs++;
1663 } else {
1664 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1665 }
1666 }
1667 ++d;
1668 if (++cidx == q->size) {
1669 cidx = 0;
1670 d = q->sdesc;
1671 }
1672 }
1673 q->cidx = cidx;
1674
1675 return (nbufs);
1676 }
1677
1678 /**
1679 * is_new_response - check if a response is newly written
1680 * @r: the response descriptor
1681 * @q: the response queue
1682 *
1683 * Returns true if a response descriptor contains a yet unprocessed
1684 * response.
1685 */
1686 static __inline int
is_new_response(const struct rsp_desc * r,const struct sge_rspq * q)1687 is_new_response(const struct rsp_desc *r,
1688 const struct sge_rspq *q)
1689 {
1690 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1691 }
1692
1693 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1694 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1695 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1696 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1697 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1698
1699 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1700 #define NOMEM_INTR_DELAY 2500
1701
1702 /**
1703 * write_ofld_wr - write an offload work request
1704 * @adap: the adapter
1705 * @m: the packet to send
1706 * @q: the Tx queue
1707 * @pidx: index of the first Tx descriptor to write
1708 * @gen: the generation value to use
1709 * @ndesc: number of descriptors the packet will occupy
1710 *
1711 * Write an offload work request to send the supplied packet. The packet
1712 * data already carry the work request with most fields populated.
1713 */
1714 static void
write_ofld_wr(adapter_t * adap,struct mbuf * m,struct sge_txq * q,unsigned int pidx,unsigned int gen,unsigned int ndesc,bus_dma_segment_t * segs,unsigned int nsegs)1715 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1716 struct sge_txq *q, unsigned int pidx,
1717 unsigned int gen, unsigned int ndesc,
1718 bus_dma_segment_t *segs, unsigned int nsegs)
1719 {
1720 unsigned int sgl_flits, flits;
1721 struct work_request_hdr *from;
1722 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1723 struct tx_desc *d = &q->desc[pidx];
1724 struct txq_state txqs;
1725
1726 if (immediate(m)) {
1727 q->sdesc[pidx].m = NULL;
1728 write_imm(d, m, m->m_len, gen);
1729 return;
1730 }
1731
1732 /* Only TX_DATA builds SGLs */
1733
1734 from = mtod(m, struct work_request_hdr *);
1735 INT3; /// DEBUG this???
1736 flits = 3; // XXXXXXXXXXXXXX
1737
1738 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1739
1740 make_sgl(sgp, segs, nsegs);
1741 sgl_flits = sgl_len(nsegs);
1742
1743 txqs.gen = q->gen;
1744 txqs.pidx = q->pidx;
1745 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1746 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1747 from->wr_hi, from->wr_lo);
1748 }
1749
1750 /**
1751 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1752 * @m: the packet
1753 *
1754 * Returns the number of Tx descriptors needed for the given offload
1755 * packet. These packets are already fully constructed.
1756 */
1757 static __inline unsigned int
calc_tx_descs_ofld(struct mbuf * m,unsigned int nsegs)1758 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1759 {
1760 unsigned int flits, cnt = 0;
1761
1762
1763 if (m->m_len <= WR_LEN)
1764 return 1; /* packet fits as immediate data */
1765
1766 if (m->m_flags & M_IOVEC)
1767 cnt = mtomv(m)->mv_count;
1768
1769 INT3; // Debug this????
1770 flits = 3; // XXXXXXXXX
1771
1772 return flits_to_desc(flits + sgl_len(cnt));
1773 }
1774
1775 /**
1776 * ofld_xmit - send a packet through an offload queue
1777 * @adap: the adapter
1778 * @q: the Tx offload queue
1779 * @m: the packet
1780 *
1781 * Send an offload packet through an SGE offload queue.
1782 */
1783 static int
ofld_xmit(adapter_t * adap,struct sge_txq * q,struct mbuf * m)1784 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1785 {
1786 int ret, nsegs;
1787 unsigned int ndesc;
1788 unsigned int pidx, gen;
1789 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1790 bus_dma_segment_t segs[TX_MAX_SEGS];
1791 int i, cleaned;
1792 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1793
1794 mtx_lock(&q->lock);
1795 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1796 mtx_unlock(&q->lock);
1797 return (ret);
1798 }
1799 ndesc = calc_tx_descs_ofld(m, nsegs);
1800 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1801
1802 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1803 if (__predict_false(ret)) {
1804 if (ret == 1) {
1805 m_set_priority(m, ndesc); /* save for restart */
1806 mtx_unlock(&q->lock);
1807 return EINTR;
1808 }
1809 goto again;
1810 }
1811
1812 gen = q->gen;
1813 q->in_use += ndesc;
1814 pidx = q->pidx;
1815 q->pidx += ndesc;
1816 if (q->pidx >= q->size) {
1817 q->pidx -= q->size;
1818 q->gen ^= 1;
1819 }
1820 #ifdef T3_TRACE
1821 T3_TRACE5(adap->tb[q->cntxt_id & 7],
1822 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1823 ndesc, pidx, skb->len, skb->len - skb->data_len,
1824 skb_shinfo(skb)->nr_frags);
1825 #endif
1826 mtx_unlock(&q->lock);
1827
1828 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1829 check_ring_tx_db(adap, q);
1830
1831 for (i = 0; i < cleaned; i++) {
1832 m_freem_vec(m_vec[i]);
1833 }
1834 return (0);
1835 }
1836
1837 /**
1838 * restart_offloadq - restart a suspended offload queue
1839 * @qs: the queue set cotaining the offload queue
1840 *
1841 * Resumes transmission on a suspended Tx offload queue.
1842 */
1843 static void
restart_offloadq(struct work * wk,void * data)1844 restart_offloadq(struct work *wk, void *data)
1845 {
1846
1847 struct mbuf *m;
1848 struct sge_qset *qs = data;
1849 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1850 adapter_t *adap = qs->port->adapter;
1851 struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1852 bus_dma_segment_t segs[TX_MAX_SEGS];
1853 int nsegs, i, cleaned;
1854 struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1855
1856 mtx_lock(&q->lock);
1857 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1858
1859 while ((m = mbufq_peek(&q->sendq)) != NULL) {
1860 unsigned int gen, pidx;
1861 unsigned int ndesc = m_get_priority(m);
1862
1863 if (__predict_false(q->size - q->in_use < ndesc)) {
1864 setbit(&qs->txq_stopped, TXQ_OFLD);
1865 smp_mb();
1866
1867 if (should_restart_tx(q) &&
1868 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1869 goto again;
1870 q->stops++;
1871 break;
1872 }
1873
1874 gen = q->gen;
1875 q->in_use += ndesc;
1876 pidx = q->pidx;
1877 q->pidx += ndesc;
1878 if (q->pidx >= q->size) {
1879 q->pidx -= q->size;
1880 q->gen ^= 1;
1881 }
1882
1883 (void)mbufq_dequeue(&q->sendq);
1884 busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1885 mtx_unlock(&q->lock);
1886 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1887 mtx_lock(&q->lock);
1888 }
1889 mtx_unlock(&q->lock);
1890
1891 #if USE_GTS
1892 set_bit(TXQ_RUNNING, &q->flags);
1893 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1894 #endif
1895 t3_write_reg(adap, A_SG_KDOORBELL,
1896 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1897
1898 for (i = 0; i < cleaned; i++) {
1899 m_freem_vec(m_vec[i]);
1900 }
1901 }
1902
1903 /**
1904 * queue_set - return the queue set a packet should use
1905 * @m: the packet
1906 *
1907 * Maps a packet to the SGE queue set it should use. The desired queue
1908 * set is carried in bits 1-3 in the packet's priority.
1909 */
1910 static __inline int
queue_set(const struct mbuf * m)1911 queue_set(const struct mbuf *m)
1912 {
1913 return m_get_priority(m) >> 1;
1914 }
1915
1916 /**
1917 * is_ctrl_pkt - return whether an offload packet is a control packet
1918 * @m: the packet
1919 *
1920 * Determines whether an offload packet should use an OFLD or a CTRL
1921 * Tx queue. This is indicated by bit 0 in the packet's priority.
1922 */
1923 static __inline int
is_ctrl_pkt(const struct mbuf * m)1924 is_ctrl_pkt(const struct mbuf *m)
1925 {
1926 return m_get_priority(m) & 1;
1927 }
1928
1929 /**
1930 * t3_offload_tx - send an offload packet
1931 * @tdev: the offload device to send to
1932 * @m: the packet
1933 *
1934 * Sends an offload packet. We use the packet priority to select the
1935 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1936 * should be sent as regular or control, bits 1-3 select the queue set.
1937 */
1938 int
t3_offload_tx(struct toedev * tdev,struct mbuf * m)1939 t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1940 {
1941 adapter_t *adap = tdev2adap(tdev);
1942 struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1943
1944 if (__predict_false(is_ctrl_pkt(m)))
1945 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1946
1947 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
1948 }
1949
1950 static void
restart_tx(struct sge_qset * qs)1951 restart_tx(struct sge_qset *qs)
1952 {
1953 if (isset(&qs->txq_stopped, TXQ_OFLD) &&
1954 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1955 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1956 qs->txq[TXQ_OFLD].restarts++;
1957 workqueue_enqueue(qs->txq[TXQ_OFLD].qresume_task.wq, &qs->txq[TXQ_OFLD].qresume_task.w, NULL);
1958 }
1959 if (isset(&qs->txq_stopped, TXQ_CTRL) &&
1960 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1961 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1962 qs->txq[TXQ_CTRL].restarts++;
1963 workqueue_enqueue(qs->txq[TXQ_CTRL].qresume_task.wq, &qs->txq[TXQ_CTRL].qresume_task.w, NULL);
1964 }
1965 }
1966
1967 /**
1968 * t3_sge_alloc_qset - initialize an SGE queue set
1969 * @sc: the controller softc
1970 * @id: the queue set id
1971 * @nports: how many Ethernet ports will be using this queue set
1972 * @irq_vec_idx: the IRQ vector index for response queue interrupts
1973 * @p: configuration parameters for this queue set
1974 * @ntxq: number of Tx queues for the queue set
1975 * @pi: port info for queue set
1976 *
1977 * Allocate resources and initialize an SGE queue set. A queue set
1978 * comprises a response queue, two Rx free-buffer queues, and up to 3
1979 * Tx queues. The Tx queues are assigned roles in the order Ethernet
1980 * queue, offload queue, and control queue.
1981 */
1982 int
t3_sge_alloc_qset(adapter_t * sc,u_int id,int nports,int irq_vec_idx,const struct qset_params * p,int ntxq,struct port_info * pi)1983 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1984 const struct qset_params *p, int ntxq, struct port_info *pi)
1985 {
1986 struct sge_qset *q = &sc->sge.qs[id];
1987 int i, ret = 0;
1988
1989 init_qset_cntxt(q, id);
1990
1991 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1992 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1993 &q->fl[0].desc, &q->fl[0].sdesc,
1994 &q->fl[0].desc_tag, &q->fl[0].desc_map,
1995 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
1996 goto err;
1997 }
1998
1999 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2000 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2001 &q->fl[1].desc, &q->fl[1].sdesc,
2002 &q->fl[1].desc_tag, &q->fl[1].desc_map,
2003 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2004 goto err;
2005 }
2006
2007 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2008 &q->rspq.phys_addr, &q->rspq.desc, NULL,
2009 &q->rspq.desc_tag, &q->rspq.desc_map,
2010 NULL, NULL)) != 0) {
2011 goto err;
2012 }
2013
2014 for (i = 0; i < ntxq; ++i) {
2015 /*
2016 * The control queue always uses immediate data so does not
2017 * need to keep track of any mbufs.
2018 * XXX Placeholder for future TOE support.
2019 */
2020 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2021
2022 if ((ret = alloc_ring(sc, p->txq_size[i],
2023 sizeof(struct tx_desc), sz,
2024 &q->txq[i].phys_addr, &q->txq[i].desc,
2025 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2026 &q->txq[i].desc_map,
2027 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2028 goto err;
2029 }
2030 mbufq_init(&q->txq[i].sendq);
2031 q->txq[i].gen = 1;
2032 q->txq[i].size = p->txq_size[i];
2033 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2034 0, irq_vec_idx, i);
2035 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2036 }
2037
2038 q->txq[TXQ_ETH].port = pi;
2039
2040 q->txq[TXQ_OFLD].qresume_task.name = "restart_offloadq";
2041 q->txq[TXQ_OFLD].qresume_task.func = restart_offloadq;
2042 q->txq[TXQ_OFLD].qresume_task.context = q;
2043 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qresume_task, NULL, "cxgb_make_task");
2044
2045 q->txq[TXQ_CTRL].qresume_task.name = "restart_ctrlq";
2046 q->txq[TXQ_CTRL].qresume_task.func = restart_ctrlq;
2047 q->txq[TXQ_CTRL].qresume_task.context = q;
2048 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_CTRL].qresume_task, NULL, "cxgb_make_task");
2049
2050 q->txq[TXQ_ETH].qreclaim_task.name = "sge_txq_reclaim_handler";
2051 q->txq[TXQ_ETH].qreclaim_task.func = sge_txq_reclaim_handler;
2052 q->txq[TXQ_ETH].qreclaim_task.context = &q->txq[TXQ_ETH];
2053 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_ETH].qreclaim_task, NULL, "cxgb_make_task");
2054
2055 q->txq[TXQ_OFLD].qreclaim_task.name = "sge_txq_reclaim_handler";
2056 q->txq[TXQ_OFLD].qreclaim_task.func = sge_txq_reclaim_handler;
2057 q->txq[TXQ_OFLD].qreclaim_task.context = &q->txq[TXQ_OFLD];
2058 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qreclaim_task, NULL, "cxgb_make_task");
2059
2060 q->fl[0].gen = q->fl[1].gen = 1;
2061 q->fl[0].size = p->fl_size;
2062 q->fl[1].size = p->jumbo_size;
2063
2064 q->rspq.gen = 1;
2065 q->rspq.cidx = 0;
2066 q->rspq.size = p->rspq_size;
2067
2068 q->txq[TXQ_ETH].stop_thres = nports *
2069 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2070
2071 q->fl[0].buf_size = MCLBYTES;
2072 q->fl[1].buf_size = MJUMPAGESIZE;
2073
2074 q->lro.enabled = lro_default;
2075
2076 mtx_lock(&sc->sge.reg_lock);
2077 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2078 q->rspq.phys_addr, q->rspq.size,
2079 q->fl[0].buf_size, 1, 0);
2080 if (ret) {
2081 printf("error %d from t3_sge_init_rspcntxt\n", ret);
2082 goto err_unlock;
2083 }
2084
2085 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2086 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2087 q->fl[i].phys_addr, q->fl[i].size,
2088 q->fl[i].buf_size, p->cong_thres, 1,
2089 0);
2090 if (ret) {
2091 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2092 goto err_unlock;
2093 }
2094 }
2095
2096 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2097 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2098 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2099 1, 0);
2100 if (ret) {
2101 printf("error %d from t3_sge_init_ecntxt\n", ret);
2102 goto err_unlock;
2103 }
2104
2105 if (ntxq > 1) {
2106 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2107 USE_GTS, SGE_CNTXT_OFLD, id,
2108 q->txq[TXQ_OFLD].phys_addr,
2109 q->txq[TXQ_OFLD].size, 0, 1, 0);
2110 if (ret) {
2111 printf("error %d from t3_sge_init_ecntxt\n", ret);
2112 goto err_unlock;
2113 }
2114 }
2115
2116 if (ntxq > 2) {
2117 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2118 SGE_CNTXT_CTRL, id,
2119 q->txq[TXQ_CTRL].phys_addr,
2120 q->txq[TXQ_CTRL].size,
2121 q->txq[TXQ_CTRL].token, 1, 0);
2122 if (ret) {
2123 printf("error %d from t3_sge_init_ecntxt\n", ret);
2124 goto err_unlock;
2125 }
2126 }
2127
2128 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2129 0, irq_vec_idx);
2130 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2131
2132 mtx_unlock(&sc->sge.reg_lock);
2133 t3_update_qset_coalesce(q, p);
2134 q->port = pi;
2135
2136 refill_fl(sc, &q->fl[0], q->fl[0].size);
2137 refill_fl(sc, &q->fl[1], q->fl[1].size);
2138 refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2139
2140 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2141 V_NEWTIMER(q->rspq.holdoff_tmr));
2142
2143 return (0);
2144
2145 err_unlock:
2146 mtx_unlock(&sc->sge.reg_lock);
2147 err:
2148 t3_free_qset(sc, q);
2149
2150 return (ret);
2151 }
2152
2153 void
t3_rx_eth(struct adapter * adap,struct sge_rspq * rq,struct mbuf * m,int ethpad)2154 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2155 {
2156 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2157 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2158 struct ifnet *ifp = pi->ifp;
2159
2160 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2161
2162 /*
2163 * XXX need to add VLAN support for 6.x
2164 */
2165 #ifdef VLAN_SUPPORTED
2166 if (__predict_false(cpl->vlan_valid)) {
2167 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2168 m->m_flags |= M_VLANTAG;
2169 }
2170 #endif
2171
2172 m_set_rcvif(m, ifp);
2173 m_explode(m);
2174 /*
2175 * adjust after conversion to mbuf chain
2176 */
2177 m_adj(m, sizeof(*cpl) + ethpad);
2178
2179 if_percpuq_enqueue(ifp->if_percpuq, m);
2180 }
2181
2182 /**
2183 * get_packet - return the next ingress packet buffer from a free list
2184 * @adap: the adapter that received the packet
2185 * @drop_thres: # of remaining buffers before we start dropping packets
2186 * @qs: the qset that the SGE free list holding the packet belongs to
2187 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2188 * @r: response descriptor
2189 *
2190 * Get the next packet from a free list and complete setup of the
2191 * sk_buff. If the packet is small we make a copy and recycle the
2192 * original buffer, otherwise we use the original buffer itself. If a
2193 * positive drop threshold is supplied packets are dropped and their
2194 * buffers recycled if (a) the number of remaining buffers is under the
2195 * threshold and the packet is too big to copy, or (b) the packet should
2196 * be copied but there is no memory for the copy.
2197 */
2198 #ifdef DISABLE_MBUF_IOVEC
2199
2200 static int
get_packet(adapter_t * adap,unsigned int drop_thres,struct sge_qset * qs,struct t3_mbuf_hdr * mh,struct rsp_desc * r,struct mbuf * m)2201 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2202 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
2203 {
2204
2205 unsigned int len_cq = ntohl(r->len_cq);
2206 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2207 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2208 uint32_t len = G_RSPD_LEN(len_cq);
2209 uint32_t flags = ntohl(r->flags);
2210 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2211 int ret = 0;
2212
2213 prefetch(sd->cl);
2214
2215 fl->credits--;
2216 bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
2217 bus_dmamap_unload(fl->entry_tag, sd->map);
2218
2219 m->m_len = len;
2220 m_cljset(m, sd->cl, fl->type);
2221
2222 switch(sopeop) {
2223 case RSPQ_SOP_EOP:
2224 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2225 mh->mh_head = mh->mh_tail = m;
2226 m->m_pkthdr.len = len;
2227 m->m_flags |= M_PKTHDR;
2228 ret = 1;
2229 break;
2230 case RSPQ_NSOP_NEOP:
2231 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2232 m->m_flags &= ~M_PKTHDR;
2233 if (mh->mh_tail == NULL) {
2234 if (cxgb_debug)
2235 printf("discarding intermediate descriptor entry\n");
2236 m_freem(m);
2237 break;
2238 }
2239 mh->mh_tail->m_next = m;
2240 mh->mh_tail = m;
2241 mh->mh_head->m_pkthdr.len += len;
2242 ret = 0;
2243 break;
2244 case RSPQ_SOP:
2245 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2246 m->m_pkthdr.len = len;
2247 mh->mh_head = mh->mh_tail = m;
2248 m->m_flags |= M_PKTHDR;
2249 ret = 0;
2250 break;
2251 case RSPQ_EOP:
2252 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2253 m->m_flags &= ~M_PKTHDR;
2254 mh->mh_head->m_pkthdr.len += len;
2255 mh->mh_tail->m_next = m;
2256 mh->mh_tail = m;
2257 ret = 1;
2258 break;
2259 }
2260 if (++fl->cidx == fl->size)
2261 fl->cidx = 0;
2262
2263 return (ret);
2264 }
2265
2266 #else
2267 static int
get_packet(adapter_t * adap,unsigned int drop_thres,struct sge_qset * qs,struct mbuf * m,struct rsp_desc * r)2268 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2269 struct mbuf *m, struct rsp_desc *r)
2270 {
2271
2272 unsigned int len_cq = ntohl(r->len_cq);
2273 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2274 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2275 uint32_t len = G_RSPD_LEN(len_cq);
2276 uint32_t flags = ntohl(r->flags);
2277 uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2278 void *cl;
2279 int ret = 0;
2280
2281 prefetch(sd->cl);
2282
2283 fl->credits--;
2284 bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
2285
2286 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2287 cl = mtod(m, void *);
2288 memcpy(cl, sd->cl, len);
2289 recycle_rx_buf(adap, fl, fl->cidx);
2290 } else {
2291 cl = sd->cl;
2292 bus_dmamap_unload(fl->entry_tag, sd->map);
2293 }
2294 switch(sopeop) {
2295 case RSPQ_SOP_EOP:
2296 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2297 m->m_len = m->m_pkthdr.len = len;
2298 if (cl == sd->cl)
2299 m_cljset(m, cl, fl->type);
2300 ret = 1;
2301 goto done;
2302 break;
2303 case RSPQ_NSOP_NEOP:
2304 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2305 ret = 0;
2306 break;
2307 case RSPQ_SOP:
2308 DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2309 m_iovinit(m);
2310 ret = 0;
2311 break;
2312 case RSPQ_EOP:
2313 DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2314 ret = 1;
2315 break;
2316 }
2317 m_iovappend(m, cl, fl->buf_size, len, 0);
2318
2319 done:
2320 if (++fl->cidx == fl->size)
2321 fl->cidx = 0;
2322
2323 return (ret);
2324 }
2325 #endif
2326 /**
2327 * handle_rsp_cntrl_info - handles control information in a response
2328 * @qs: the queue set corresponding to the response
2329 * @flags: the response control flags
2330 *
2331 * Handles the control information of an SGE response, such as GTS
2332 * indications and completion credits for the queue set's Tx queues.
2333 * HW coalesces credits, we don't do any extra SW coalescing.
2334 */
2335 static __inline void
handle_rsp_cntrl_info(struct sge_qset * qs,uint32_t flags)2336 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2337 {
2338 unsigned int credits;
2339
2340 #if USE_GTS
2341 if (flags & F_RSPD_TXQ0_GTS)
2342 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2343 #endif
2344 credits = G_RSPD_TXQ0_CR(flags);
2345 if (credits) {
2346 qs->txq[TXQ_ETH].processed += credits;
2347 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2348 workqueue_enqueue(qs->port->timer_reclaim_task.wq,
2349 &qs->port->timer_reclaim_task.w, NULL);
2350 }
2351
2352 credits = G_RSPD_TXQ2_CR(flags);
2353 if (credits)
2354 qs->txq[TXQ_CTRL].processed += credits;
2355
2356 # if USE_GTS
2357 if (flags & F_RSPD_TXQ1_GTS)
2358 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2359 # endif
2360 credits = G_RSPD_TXQ1_CR(flags);
2361 if (credits)
2362 qs->txq[TXQ_OFLD].processed += credits;
2363 }
2364
2365 static void
check_ring_db(adapter_t * adap,struct sge_qset * qs,unsigned int sleeping)2366 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2367 unsigned int sleeping)
2368 {
2369 ;
2370 }
2371
2372 /**
2373 * process_responses - process responses from an SGE response queue
2374 * @adap: the adapter
2375 * @qs: the queue set to which the response queue belongs
2376 * @budget: how many responses can be processed in this round
2377 *
2378 * Process responses from an SGE response queue up to the supplied budget.
2379 * Responses include received packets as well as credits and other events
2380 * for the queues that belong to the response queue's queue set.
2381 * A negative budget is effectively unlimited.
2382 *
2383 * Additionally choose the interrupt holdoff time for the next interrupt
2384 * on this queue. If the system is under memory shortage use a fairly
2385 * long delay to help recovery.
2386 */
2387 static int
process_responses(adapter_t * adap,struct sge_qset * qs,int budget)2388 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2389 {
2390 struct sge_rspq *rspq = &qs->rspq;
2391 struct rsp_desc *r = &rspq->desc[rspq->cidx];
2392 int budget_left = budget;
2393 unsigned int sleeping = 0;
2394 int lro = qs->lro.enabled;
2395 #ifdef DEBUG
2396 static int last_holdoff = 0;
2397 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2398 printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2399 last_holdoff = rspq->holdoff_tmr;
2400 }
2401 #endif
2402 rspq->next_holdoff = rspq->holdoff_tmr;
2403
2404 while (__predict_true(budget_left && is_new_response(r, rspq))) {
2405 int eth, eop = 0, ethpad = 0;
2406 uint32_t flags = ntohl(r->flags);
2407 uint32_t rss_csum = *(const uint32_t *)r;
2408 uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2409
2410 eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2411
2412 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2413 /* XXX */
2414 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2415 #ifdef DISABLE_MBUF_IOVEC
2416 if (cxgb_debug)
2417 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
2418
2419 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
2420 rspq->next_holdoff = NOMEM_INTR_DELAY;
2421 budget_left--;
2422 break;
2423 } else {
2424 eop = 1;
2425 }
2426 #else
2427 struct mbuf *m = NULL;
2428
2429 if (rspq->rspq_mbuf == NULL)
2430 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2431 else
2432 m = m_gethdr(M_DONTWAIT, MT_DATA);
2433
2434 /*
2435 * XXX revisit me
2436 */
2437 if (rspq->rspq_mbuf == NULL && m == NULL) {
2438 rspq->next_holdoff = NOMEM_INTR_DELAY;
2439 budget_left--;
2440 break;
2441 }
2442 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
2443 goto skip;
2444 eop = 1;
2445 #endif
2446 rspq->imm_data++;
2447 } else if (r->len_cq) {
2448 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2449
2450 #ifdef DISABLE_MBUF_IOVEC
2451 struct mbuf *m;
2452 m = m_gethdr(M_NOWAIT, MT_DATA);
2453
2454 if (m == NULL) {
2455 log(LOG_WARNING, "failed to get mbuf for packet\n");
2456 break;
2457 }
2458
2459 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
2460 #else
2461 if (rspq->rspq_mbuf == NULL)
2462 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2463 if (rspq->rspq_mbuf == NULL) {
2464 log(LOG_WARNING, "failed to get mbuf for packet\n");
2465 break;
2466 }
2467 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
2468 #endif
2469 ethpad = 2;
2470 } else {
2471 DPRINTF("pure response\n");
2472 rspq->pure_rsps++;
2473 }
2474
2475 if (flags & RSPD_CTRL_MASK) {
2476 sleeping |= flags & RSPD_GTS_MASK;
2477 handle_rsp_cntrl_info(qs, flags);
2478 }
2479 #ifndef DISABLE_MBUF_IOVEC
2480 skip:
2481 #endif
2482 r++;
2483 if (__predict_false(++rspq->cidx == rspq->size)) {
2484 rspq->cidx = 0;
2485 rspq->gen ^= 1;
2486 r = rspq->desc;
2487 }
2488
2489 prefetch(r);
2490 if (++rspq->credits >= (rspq->size / 4)) {
2491 refill_rspq(adap, rspq, rspq->credits);
2492 rspq->credits = 0;
2493 }
2494
2495 if (eop) {
2496 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2497 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2498
2499 if (eth) {
2500 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2501 rss_hash, rss_csum, lro);
2502
2503 rspq->rspq_mh.mh_head = NULL;
2504 } else {
2505 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2506 /*
2507 * XXX size mismatch
2508 */
2509 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2510 }
2511 __refill_fl(adap, &qs->fl[0]);
2512 __refill_fl(adap, &qs->fl[1]);
2513
2514 }
2515 --budget_left;
2516 }
2517
2518 t3_lro_flush(adap, qs, &qs->lro);
2519
2520 if (sleeping)
2521 check_ring_db(adap, qs, sleeping);
2522
2523 smp_mb(); /* commit Tx queue processed updates */
2524 if (__predict_false(qs->txq_stopped != 0))
2525 restart_tx(qs);
2526
2527 budget -= budget_left;
2528 return (budget);
2529 }
2530
2531 /*
2532 * A helper function that processes responses and issues GTS.
2533 */
2534 static __inline int
process_responses_gts(adapter_t * adap,struct sge_rspq * rq)2535 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2536 {
2537 int work;
2538 static int last_holdoff = 0;
2539
2540 work = process_responses(adap, rspq_to_qset(rq), -1);
2541
2542 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2543 printf("next_holdoff=%d\n", rq->next_holdoff);
2544 last_holdoff = rq->next_holdoff;
2545 }
2546 if (work)
2547 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2548 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2549 return work;
2550 }
2551
2552
2553 /*
2554 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2555 * Handles data events from SGE response queues as well as error and other
2556 * async events as they all use the same interrupt pin. We use one SGE
2557 * response queue per port in this mode and protect all response queues with
2558 * queue 0's lock.
2559 */
2560 int
t3b_intr(void * data)2561 t3b_intr(void *data)
2562 {
2563 uint32_t i, map;
2564 adapter_t *adap = data;
2565 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2566
2567 t3_write_reg(adap, A_PL_CLI, 0);
2568 map = t3_read_reg(adap, A_SG_DATA_INTR);
2569
2570 if (!map)
2571 return (FALSE);
2572
2573 if (__predict_false(map & F_ERRINTR))
2574 workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
2575
2576 mtx_lock(&q0->lock);
2577 for_each_port(adap, i)
2578 if (map & (1 << i))
2579 process_responses_gts(adap, &adap->sge.qs[i].rspq);
2580 mtx_unlock(&q0->lock);
2581
2582 return (TRUE);
2583 }
2584
2585 /*
2586 * The MSI interrupt handler. This needs to handle data events from SGE
2587 * response queues as well as error and other async events as they all use
2588 * the same MSI vector. We use one SGE response queue per port in this mode
2589 * and protect all response queues with queue 0's lock.
2590 */
2591 int
t3_intr_msi(void * data)2592 t3_intr_msi(void *data)
2593 {
2594 adapter_t *adap = data;
2595 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2596 int i, new_packets = 0;
2597
2598 mtx_lock(&q0->lock);
2599
2600 for_each_port(adap, i)
2601 if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2602 new_packets = 1;
2603 mtx_unlock(&q0->lock);
2604 if (new_packets == 0)
2605 workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
2606
2607 return (TRUE);
2608 }
2609
2610 int
t3_intr_msix(void * data)2611 t3_intr_msix(void *data)
2612 {
2613 struct sge_qset *qs = data;
2614 adapter_t *adap = qs->port->adapter;
2615 struct sge_rspq *rspq = &qs->rspq;
2616
2617 mtx_lock(&rspq->lock);
2618 if (process_responses_gts(adap, rspq) == 0)
2619 rspq->unhandled_irqs++;
2620 mtx_unlock(&rspq->lock);
2621
2622 return (TRUE);
2623 }
2624
2625 /**
2626 * t3_get_desc - dump an SGE descriptor for debugging purposes
2627 * @qs: the queue set
2628 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2629 * @idx: the descriptor index in the queue
2630 * @data: where to dump the descriptor contents
2631 *
2632 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2633 * size of the descriptor.
2634 */
2635 int
t3_get_desc(const struct sge_qset * qs,unsigned int qnum,unsigned int idx,unsigned char * data)2636 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2637 unsigned char *data)
2638 {
2639 if (qnum >= 6)
2640 return (EINVAL);
2641
2642 if (qnum < 3) {
2643 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2644 return -EINVAL;
2645 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2646 return sizeof(struct tx_desc);
2647 }
2648
2649 if (qnum == 3) {
2650 if (!qs->rspq.desc || idx >= qs->rspq.size)
2651 return (EINVAL);
2652 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2653 return sizeof(struct rsp_desc);
2654 }
2655
2656 qnum -= 4;
2657 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2658 return (EINVAL);
2659 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2660 return sizeof(struct rx_desc);
2661 }
2662