1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: cxgb_sge.c,v 1.4 2016/06/10 13:27:14 ozaki-r Exp $");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/conf.h>
37 #include <sys/bus.h>
38 #include <sys/queue.h>
39 #include <sys/sysctl.h>
40 
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/systm.h>
44 
45 #include <netinet/in_systm.h>
46 #include <netinet/in.h>
47 #include <netinet/ip.h>
48 #include <netinet/tcp.h>
49 
50 #include <dev/pci/pcireg.h>
51 #include <dev/pci/pcivar.h>
52 
53 #ifdef CONFIG_DEFINED
54 #include <cxgb_include.h>
55 #else
56 #include <dev/pci/cxgb/cxgb_include.h>
57 #endif
58 
59 uint32_t collapse_free = 0;
60 uint32_t mb_free_vec_free = 0;
61 int      txq_fills = 0;
62 int      collapse_mbufs = 0;
63 static int bogus_imm = 0;
64 #ifndef DISABLE_MBUF_IOVEC
65 static int recycle_enable = 1;
66 #endif
67 
68 #define USE_GTS 0
69 
70 #define SGE_RX_SM_BUF_SIZE  1536
71 #define SGE_RX_DROP_THRES   16
72 #define SGE_RX_COPY_THRES   128
73 
74 /*
75  * Period of the Tx buffer reclaim timer.  This timer does not need to run
76  * frequently as Tx buffers are usually reclaimed by new Tx packets.
77  */
78 #define TX_RECLAIM_PERIOD       (hz >> 1)
79 
80 /*
81  * work request size in bytes
82  */
83 #define WR_LEN (WR_FLITS * 8)
84 
85 /*
86  * Values for sge_txq.flags
87  */
88 enum {
89     TXQ_RUNNING = 1 << 0,  /* fetch engine is running */
90     TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
91 };
92 
93 struct tx_desc {
94     uint64_t    flit[TX_DESC_FLITS];
95 } __packed;
96 
97 struct rx_desc {
98     uint32_t    addr_lo;
99     uint32_t    len_gen;
100     uint32_t    gen2;
101     uint32_t    addr_hi;
102 } __packed;
103 
104 struct rsp_desc {               /* response queue descriptor */
105     struct rss_header   rss_hdr;
106     uint32_t        flags;
107     uint32_t        len_cq;
108     uint8_t         imm_data[47];
109     uint8_t         intr_gen;
110 } __packed;
111 
112 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
113 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
114 #define RX_SW_DESC_INUSE        (1 << 3)
115 #define TX_SW_DESC_MAPPED       (1 << 4)
116 
117 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
118 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
119 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
120 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
121 
122 struct tx_sw_desc {                /* SW state per Tx descriptor */
123     struct mbuf *m;
124     bus_dma_segment_t segs[1];
125     bus_dmamap_t    map;
126     int     flags;
127 };
128 
129 struct rx_sw_desc {                /* SW state per Rx descriptor */
130     void            *cl;
131     bus_dmamap_t    map;
132     int     flags;
133 };
134 
135 struct txq_state {
136     unsigned int compl;
137     unsigned int gen;
138     unsigned int pidx;
139 };
140 
141 /*
142  * Maps a number of flits to the number of Tx descriptors that can hold them.
143  * The formula is
144  *
145  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
146  *
147  * HW allows up to 4 descriptors to be combined into a WR.
148  */
149 static uint8_t flit_desc_map[] = {
150     0,
151 #if SGE_NUM_GENBITS == 1
152     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
154     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
155     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
156 #elif SGE_NUM_GENBITS == 2
157     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
160     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
161 #else
162 # error "SGE_NUM_GENBITS must be 1 or 2"
163 #endif
164 };
165 
166 
167 static int lro_default = 0;
168 int cxgb_debug = 0;
169 
170 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
171 static void sge_timer_cb(void *arg);
172 static void sge_timer_reclaim(struct work *wk, void *arg);
173 static void sge_txq_reclaim_handler(struct work *wk, void *arg);
174 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
175 
176 /**
177  *  reclaim_completed_tx - reclaims completed Tx descriptors
178  *  @adapter: the adapter
179  *  @q: the Tx queue to reclaim completed descriptors from
180  *
181  *  Reclaims Tx descriptors that the SGE has indicated it has processed,
182  *  and frees the associated buffers if possible.  Called with the Tx
183  *  queue's lock held.
184  */
185 static __inline int
reclaim_completed_tx(struct sge_txq * q,int nbufs,struct mbuf ** mvec)186 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
187 {
188     int reclaimed, reclaim = desc_reclaimable(q);
189     int n = 0;
190 
191     mtx_assert(&q->lock, MA_OWNED);
192     if (reclaim > 0) {
193         n = free_tx_desc(q, min(reclaim, nbufs), mvec);
194         reclaimed = min(reclaim, nbufs);
195         q->cleaned += reclaimed;
196         q->in_use -= reclaimed;
197     }
198     return (n);
199 }
200 
201 /**
202  *  should_restart_tx - are there enough resources to restart a Tx queue?
203  *  @q: the Tx queue
204  *
205  *  Checks if there are enough descriptors to restart a suspended Tx queue.
206  */
207 static __inline int
should_restart_tx(const struct sge_txq * q)208 should_restart_tx(const struct sge_txq *q)
209 {
210     unsigned int r = q->processed - q->cleaned;
211 
212     return q->in_use - r < (q->size >> 1);
213 }
214 
215 /**
216  *  t3_sge_init - initialize SGE
217  *  @adap: the adapter
218  *  @p: the SGE parameters
219  *
220  *  Performs SGE initialization needed every time after a chip reset.
221  *  We do not initialize any of the queue sets here, instead the driver
222  *  top-level must request those individually.  We also do not enable DMA
223  *  here, that should be done after the queues have been set up.
224  */
225 void
t3_sge_init(adapter_t * adap,struct sge_params * p)226 t3_sge_init(adapter_t *adap, struct sge_params *p)
227 {
228     u_int ctrl, ups;
229 
230     ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
231 
232     ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
233            F_CQCRDTCTRL |
234            V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
235            V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
236 #if SGE_NUM_GENBITS == 1
237     ctrl |= F_EGRGENCTRL;
238 #endif
239     if (adap->params.rev > 0) {
240         if (!(adap->flags & (USING_MSIX | USING_MSI)))
241             ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
242         ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
243     }
244     t3_write_reg(adap, A_SG_CONTROL, ctrl);
245     t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
246              V_LORCQDRBTHRSH(512));
247     t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
248     t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
249              V_TIMEOUT(200 * core_ticks_per_usec(adap)));
250     t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
251     t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
252     t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
253     t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
254     t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
255     t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
256 }
257 
258 
259 /**
260  *  sgl_len - calculates the size of an SGL of the given capacity
261  *  @n: the number of SGL entries
262  *
263  *  Calculates the number of flits needed for a scatter/gather list that
264  *  can hold the given number of entries.
265  */
266 static __inline unsigned int
sgl_len(unsigned int n)267 sgl_len(unsigned int n)
268 {
269     return ((3 * n) / 2 + (n & 1));
270 }
271 
272 /**
273  *  get_imm_packet - return the next ingress packet buffer from a response
274  *  @resp: the response descriptor containing the packet data
275  *
276  *  Return a packet containing the immediate data of the given response.
277  */
278 #ifdef DISABLE_MBUF_IOVEC
279 static __inline int
get_imm_packet(adapter_t * sc,const struct rsp_desc * resp,struct t3_mbuf_hdr * mh)280 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
281 {
282     struct mbuf *m;
283     int len;
284     uint32_t flags = ntohl(resp->flags);
285     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
286 
287     /*
288      * would be a firmware bug
289      */
290     if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
291         return (0);
292 
293     m = m_gethdr(M_NOWAIT, MT_DATA);
294     len = G_RSPD_LEN(ntohl(resp->len_cq));
295 
296     if (m) {
297         MH_ALIGN(m, IMMED_PKT_SIZE);
298         memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
299         m->m_len = len;
300 
301         switch (sopeop) {
302         case RSPQ_SOP_EOP:
303             mh->mh_head = mh->mh_tail = m;
304             m->m_pkthdr.len = len;
305             m->m_flags |= M_PKTHDR;
306             break;
307         case RSPQ_EOP:
308             m->m_flags &= ~M_PKTHDR;
309             mh->mh_head->m_pkthdr.len += len;
310             mh->mh_tail->m_next = m;
311             mh->mh_tail = m;
312             break;
313         }
314     }
315     return (m != NULL);
316 }
317 
318 #else
319 static int
get_imm_packet(adapter_t * sc,const struct rsp_desc * resp,struct mbuf * m,void * cl,uint32_t flags)320 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
321 {
322     int len, error;
323     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
324 
325     /*
326      * would be a firmware bug
327      */
328     len = G_RSPD_LEN(ntohl(resp->len_cq));
329     if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
330         if (cxgb_debug)
331             device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
332         bogus_imm++;
333         return (EINVAL);
334     }
335     error = 0;
336     switch (sopeop) {
337     case RSPQ_SOP_EOP:
338         m->m_len = m->m_pkthdr.len = len;
339         memcpy(mtod(m, uint8_t *), resp->imm_data, len);
340         break;
341     case RSPQ_EOP:
342         memcpy(cl, resp->imm_data, len);
343         m_iovappend(m, cl, MSIZE, len, 0);
344         break;
345     default:
346         bogus_imm++;
347         error = EINVAL;
348     }
349 
350     return (error);
351 }
352 #endif
353 
354 static __inline u_int
flits_to_desc(u_int n)355 flits_to_desc(u_int n)
356 {
357     return (flit_desc_map[n]);
358 }
359 
360 void
t3_sge_err_intr_handler(adapter_t * adapter)361 t3_sge_err_intr_handler(adapter_t *adapter)
362 {
363     unsigned int v, status;
364 
365 
366     status = t3_read_reg(adapter, A_SG_INT_CAUSE);
367 
368     if (status & F_RSPQCREDITOVERFOW)
369         CH_ALERT(adapter, "SGE response queue credit overflow\n");
370 
371     if (status & F_RSPQDISABLED) {
372         v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
373 
374         CH_ALERT(adapter,
375              "packet delivered to disabled response queue (0x%x)\n",
376              (v >> S_RSPQ0DISABLED) & 0xff);
377     }
378 
379     t3_write_reg(adapter, A_SG_INT_CAUSE, status);
380     if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
381         t3_fatal_err(adapter);
382 }
383 
384 void
t3_sge_prep(adapter_t * adap,struct sge_params * p)385 t3_sge_prep(adapter_t *adap, struct sge_params *p)
386 {
387     int i;
388 
389     /* XXX Does ETHER_ALIGN need to be accounted for here? */
390     p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
391 
392     for (i = 0; i < SGE_QSETS; ++i) {
393         struct qset_params *q = p->qset + i;
394 
395         q->polling = adap->params.rev > 0;
396 
397         if (adap->params.nports > 2)
398             q->coalesce_nsecs = 50000;
399         else
400             q->coalesce_nsecs = 5000;
401 
402         q->rspq_size = RSPQ_Q_SIZE;
403         q->fl_size = FL_Q_SIZE;
404         q->jumbo_size = JUMBO_Q_SIZE;
405         q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
406         q->txq_size[TXQ_OFLD] = 1024;
407         q->txq_size[TXQ_CTRL] = 256;
408         q->cong_thres = 0;
409     }
410 }
411 
412 int
t3_sge_alloc(adapter_t * sc)413 t3_sge_alloc(adapter_t *sc)
414 {
415     /* The parent tag. */
416     sc->parent_dmat = sc->pa.pa_dmat;
417 
418     /*
419      * DMA tag for normal sized RX frames
420      */
421     sc->rx_dmat = sc->pa.pa_dmat;
422 
423     /*
424      * DMA tag for jumbo sized RX frames.
425      */
426     sc->rx_jumbo_dmat = sc->pa.pa_dmat;
427 
428     /*
429      * DMA tag for TX frames.
430      */
431     sc->tx_dmat = sc->pa.pa_dmat;
432 
433     return (0);
434 }
435 
436 int
t3_sge_free(struct adapter * sc)437 t3_sge_free(struct adapter * sc)
438 {
439     return (0);
440 }
441 
442 void
t3_update_qset_coalesce(struct sge_qset * qs,const struct qset_params * p)443 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
444 {
445 
446     qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
447     qs->rspq.polling = 0 /* p->polling */;
448 }
449 
450 /**
451  *  refill_fl - refill an SGE free-buffer list
452  *  @sc: the controller softc
453  *  @q: the free-list to refill
454  *  @n: the number of new buffers to allocate
455  *
456  *  (Re)populate an SGE free-buffer list with up to @n new packet buffers.
457  *  The caller must assure that @n does not exceed the queue's capacity.
458  */
459 static void
refill_fl(adapter_t * sc,struct sge_fl * q,int n)460 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
461 {
462     struct rx_sw_desc *sd = &q->sdesc[q->pidx];
463     struct rx_desc *d = &q->desc[q->pidx];
464     void *cl;
465     int err;
466 
467     while (n--) {
468         /*
469          * We only allocate a cluster, mbuf allocation happens after rx
470          */
471         if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0)
472         {
473             err = bus_dmamap_create(sc->pa.pa_dmat,
474                         q->buf_size, 1, q->buf_size, 0,
475                         BUS_DMA_ALLOCNOW, &sd->map);
476             if (err != 0)
477             {
478                 log(LOG_WARNING, "failure in refill_fl\n");
479                 return;
480             }
481             sd->flags |= RX_SW_DESC_MAP_CREATED;
482         }
483         cl = malloc(q->buf_size, M_DEVBUF, M_NOWAIT);
484         if (cl == NULL)
485         {
486             log(LOG_WARNING, "Failed to allocate cluster\n");
487             break;
488         }
489         err = bus_dmamap_load(sc->pa.pa_dmat, sd->map, cl, q->buf_size, NULL, BUS_DMA_NOWAIT);
490         if (err)
491         {
492             log(LOG_WARNING, "failure in refill_fl\n");
493             free(cl, M_DEVBUF);
494             return;
495         }
496 
497         sd->flags |= RX_SW_DESC_INUSE;
498         sd->cl = cl;
499         d->addr_lo = htobe32(sd->map->dm_segs[0].ds_addr & 0xffffffff);
500         d->addr_hi = htobe32(((uint64_t)sd->map->dm_segs[0].ds_addr>>32) & 0xffffffff);
501         d->len_gen = htobe32(V_FLD_GEN1(q->gen));
502         d->gen2 = htobe32(V_FLD_GEN2(q->gen));
503 
504         d++;
505         sd++;
506 
507         if (++q->pidx == q->size) {
508             q->pidx = 0;
509             q->gen ^= 1;
510             sd = q->sdesc;
511             d = q->desc;
512         }
513         q->credits++;
514     }
515 
516     t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
517 }
518 
519 
520 /**
521  *  free_rx_bufs - free the Rx buffers on an SGE free list
522  *  @sc: the controle softc
523  *  @q: the SGE free list to clean up
524  *
525  *  Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
526  *  this queue should be stopped before calling this function.
527  */
528 static void
free_rx_bufs(adapter_t * sc,struct sge_fl * q)529 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
530 {
531     u_int cidx = q->cidx;
532 
533     while (q->credits--) {
534         struct rx_sw_desc *d = &q->sdesc[cidx];
535 
536         if (d->flags & RX_SW_DESC_INUSE) {
537 	    bus_dmamap_unload(q->entry_tag, d->map);
538 	    bus_dmamap_destroy(q->entry_tag, d->map);
539 	    d->map = NULL;
540             free(d->cl, M_DEVBUF);
541 	    d->cl = NULL;
542         }
543         d->cl = NULL;
544         if (++cidx == q->size)
545             cidx = 0;
546     }
547 }
548 
549 static __inline void
__refill_fl(adapter_t * adap,struct sge_fl * fl)550 __refill_fl(adapter_t *adap, struct sge_fl *fl)
551 {
552     refill_fl(adap, fl, min(16U, fl->size - fl->credits));
553 }
554 
555 #ifndef DISABLE_MBUF_IOVEC
556 /**
557  *  recycle_rx_buf - recycle a receive buffer
558  *  @adapter: the adapter
559  *  @q: the SGE free list
560  *  @idx: index of buffer to recycle
561  *
562  *  Recycles the specified buffer on the given free list by adding it at
563  *  the next available slot on the list.
564  */
565 static void
recycle_rx_buf(adapter_t * adap,struct sge_fl * q,unsigned int idx)566 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
567 {
568     struct rx_desc *from = &q->desc[idx];
569     struct rx_desc *to   = &q->desc[q->pidx];
570 
571     q->sdesc[q->pidx] = q->sdesc[idx];
572     to->addr_lo = from->addr_lo;        // already big endian
573     to->addr_hi = from->addr_hi;        // likewise
574     wmb();
575     to->len_gen = htobe32(V_FLD_GEN1(q->gen));
576     to->gen2 = htobe32(V_FLD_GEN2(q->gen));
577     q->credits++;
578 
579     if (++q->pidx == q->size) {
580         q->pidx = 0;
581         q->gen ^= 1;
582     }
583     t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
584 }
585 #endif
586 
587 static int
alloc_ring(adapter_t * sc,size_t nelem,size_t elem_size,size_t sw_size,bus_addr_t * phys,void * desc,void * sdesc,bus_dma_tag_t * tag,bus_dmamap_t * map,bus_dma_tag_t parent_entry_tag,bus_dma_tag_t * entry_tag)588 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
589     bus_addr_t *phys,
590     void *desc, void *sdesc, bus_dma_tag_t *tag,
591     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
592 {
593     size_t len = nelem * elem_size;
594     void *s = NULL;
595     void *p = NULL;
596     int err;
597     bus_dma_segment_t phys_seg;
598 
599     int nsegs;
600 
601     *tag = sc->pa.pa_dmat;
602 
603     /* allocate wired physical memory for DMA descriptor array */
604     err = bus_dmamem_alloc(*tag, len, PAGE_SIZE, 0, &phys_seg, 1,
605                 &nsegs, BUS_DMA_NOWAIT);
606     if (err != 0)
607     {
608         device_printf(sc->dev, "Cannot allocate descriptor memory\n");
609         return (ENOMEM);
610     }
611     *phys = phys_seg.ds_addr;
612 
613     /* map physical address to kernel virtual address */
614     err = bus_dmamem_map(*tag, &phys_seg, 1, len, &p,
615                 BUS_DMA_NOWAIT|BUS_DMA_COHERENT);
616     if (err != 0)
617     {
618         device_printf(sc->dev, "Cannot map descriptor memory\n");
619         return (ENOMEM);
620     }
621 
622     memset(p, 0, len);
623     *(void **)desc = p;
624 
625     if (sw_size)
626     {
627         len = nelem * sw_size;
628         s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
629         *(void **)sdesc = s;
630     }
631 
632     if (parent_entry_tag == NULL)
633         return (0);
634     *entry_tag = sc->pa.pa_dmat;
635 
636     return (0);
637 }
638 
639 static void
sge_slow_intr_handler(struct work * wk,void * arg)640 sge_slow_intr_handler(struct work *wk, void *arg)
641 {
642     adapter_t *sc = arg;
643 
644     t3_slow_intr_handler(sc);
645 }
646 
647 /**
648  *  sge_timer_cb - perform periodic maintenance of an SGE qset
649  *  @data: the SGE queue set to maintain
650  *
651  *  Runs periodically from a timer to perform maintenance of an SGE queue
652  *  set.  It performs two tasks:
653  *
654  *  a) Cleans up any completed Tx descriptors that may still be pending.
655  *  Normal descriptor cleanup happens when new packets are added to a Tx
656  *  queue so this timer is relatively infrequent and does any cleanup only
657  *  if the Tx queue has not seen any new packets in a while.  We make a
658  *  best effort attempt to reclaim descriptors, in that we don't wait
659  *  around if we cannot get a queue's lock (which most likely is because
660  *  someone else is queueing new packets and so will also handle the clean
661  *  up).  Since control queues use immediate data exclusively we don't
662  *  bother cleaning them up here.
663  *
664  *  b) Replenishes Rx queues that have run out due to memory shortage.
665  *  Normally new Rx buffers are added when existing ones are consumed but
666  *  when out of memory a queue can become empty.  We try to add only a few
667  *  buffers here, the queue will be replenished fully as these new buffers
668  *  are used up if memory shortage has subsided.
669  *
670  *  c) Return coalesced response queue credits in case a response queue is
671  *  starved.
672  *
673  *  d) Ring doorbells for T304 tunnel queues since we have seen doorbell
674  *  fifo overflows and the FW doesn't implement any recovery scheme yet.
675  */
676 
677 static void
sge_timer_cb(void * arg)678 sge_timer_cb(void *arg)
679 {
680     adapter_t *sc = arg;
681     struct port_info *p;
682     struct sge_qset *qs;
683     struct sge_txq  *txq;
684     int i, j;
685     int reclaim_eth, reclaim_ofl, refill_rx;
686 
687     for (i = 0; i < sc->params.nports; i++)
688         for (j = 0; j < sc->port[i].nqsets; j++) {
689             qs = &sc->sge.qs[i + j];
690             txq = &qs->txq[0];
691             reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
692             reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
693             refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
694                 (qs->fl[1].credits < qs->fl[1].size));
695             if (reclaim_eth || reclaim_ofl || refill_rx) {
696                 p = &sc->port[i];
697                 workqueue_enqueue(p->timer_reclaim_task.wq, &p->timer_reclaim_task.w, NULL);
698                 break;
699             }
700         }
701     if (sc->params.nports > 2) {
702         int k;
703 
704         for_each_port(sc, k) {
705             struct port_info *pi = &sc->port[k];
706 
707             t3_write_reg(sc, A_SG_KDOORBELL,
708                      F_SELEGRCNTX |
709                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
710         }
711     }
712     if (sc->open_device_map != 0)
713         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
714 }
715 
716 /*
717  * This is meant to be a catch-all function to keep sge state private
718  * to sge.c
719  *
720  */
721 int
t3_sge_init_adapter(adapter_t * sc)722 t3_sge_init_adapter(adapter_t *sc)
723 {
724     callout_init(&sc->sge_timer_ch, 0);
725     callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
726     sc->slow_intr_task.name = "sge_slow_intr";
727     sc->slow_intr_task.func = sge_slow_intr_handler;
728     sc->slow_intr_task.context = sc;
729     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &sc->slow_intr_task, NULL, "cxgb_make_task");
730     return (0);
731 }
732 
733 int
t3_sge_init_port(struct port_info * p)734 t3_sge_init_port(struct port_info *p)
735 {
736     p->timer_reclaim_task.name = "sge_timer_reclaim";
737     p->timer_reclaim_task.func = sge_timer_reclaim;
738     p->timer_reclaim_task.context = p;
739     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &p->timer_reclaim_task, NULL, "cxgb_make_task");
740 
741     return (0);
742 }
743 
744 void
t3_sge_deinit_sw(adapter_t * sc)745 t3_sge_deinit_sw(adapter_t *sc)
746 {
747     callout_drain(&sc->sge_timer_ch);
748 }
749 
750 /**
751  *  refill_rspq - replenish an SGE response queue
752  *  @adapter: the adapter
753  *  @q: the response queue to replenish
754  *  @credits: how many new responses to make available
755  *
756  *  Replenishes a response queue by making the supplied number of responses
757  *  available to HW.
758  */
759 static __inline void
refill_rspq(adapter_t * sc,const struct sge_rspq * q,u_int credits)760 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
761 {
762 
763     /* mbufs are allocated on demand when a rspq entry is processed. */
764     t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
765              V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
766 }
767 
768 static __inline void
sge_txq_reclaim_(struct sge_txq * txq)769 sge_txq_reclaim_(struct sge_txq *txq)
770 {
771     int reclaimable, i, n;
772     struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
773     struct port_info *p;
774 
775     p = txq->port;
776 reclaim_more:
777     n = 0;
778     reclaimable = desc_reclaimable(txq);
779     if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
780         n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
781         mtx_unlock(&txq->lock);
782     }
783     if (n == 0)
784         return;
785 
786     for (i = 0; i < n; i++) {
787         m_freem_vec(m_vec[i]);
788     }
789     if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
790         txq->size - txq->in_use >= TX_START_MAX_DESC) {
791         txq_fills++;
792         p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
793         workqueue_enqueue(p->start_task.wq, &p->start_task.w, NULL);
794     }
795 
796     if (n)
797         goto reclaim_more;
798 }
799 
800 static void
sge_txq_reclaim_handler(struct work * wk,void * arg)801 sge_txq_reclaim_handler(struct work *wk, void *arg)
802 {
803     struct sge_txq *q = arg;
804 
805     sge_txq_reclaim_(q);
806 }
807 
808 static void
sge_timer_reclaim(struct work * wk,void * arg)809 sge_timer_reclaim(struct work *wk, void *arg)
810 {
811     struct port_info *p = arg;
812     int i, nqsets = p->nqsets;
813     adapter_t *sc = p->adapter;
814     struct sge_qset *qs;
815     struct sge_txq *txq;
816     struct mtx *lock;
817 
818     for (i = 0; i < nqsets; i++) {
819         qs = &sc->sge.qs[i];
820         txq = &qs->txq[TXQ_ETH];
821         sge_txq_reclaim_(txq);
822 
823         txq = &qs->txq[TXQ_OFLD];
824         sge_txq_reclaim_(txq);
825 
826         lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
827                 &sc->sge.qs[0].rspq.lock;
828 
829         if (mtx_trylock(lock)) {
830             /* XXX currently assume that we are *NOT* polling */
831             uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
832 
833             if (qs->fl[0].credits < qs->fl[0].size - 16)
834                 __refill_fl(sc, &qs->fl[0]);
835             if (qs->fl[1].credits < qs->fl[1].size - 16)
836                 __refill_fl(sc, &qs->fl[1]);
837 
838             if (status & (1 << qs->rspq.cntxt_id)) {
839                 if (qs->rspq.credits) {
840                     refill_rspq(sc, &qs->rspq, 1);
841                     qs->rspq.credits--;
842                     t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
843                         1 << qs->rspq.cntxt_id);
844                 }
845             }
846             mtx_unlock(lock);
847         }
848     }
849 }
850 
851 /**
852  *  init_qset_cntxt - initialize an SGE queue set context info
853  *  @qs: the queue set
854  *  @id: the queue set id
855  *
856  *  Initializes the TIDs and context ids for the queues of a queue set.
857  */
858 static void
init_qset_cntxt(struct sge_qset * qs,u_int id)859 init_qset_cntxt(struct sge_qset *qs, u_int id)
860 {
861 
862     qs->rspq.cntxt_id = id;
863     qs->fl[0].cntxt_id = 2 * id;
864     qs->fl[1].cntxt_id = 2 * id + 1;
865     qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
866     qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
867     qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
868     qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
869     qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
870 }
871 
872 
873 static void
txq_prod(struct sge_txq * txq,unsigned int ndesc,struct txq_state * txqs)874 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
875 {
876     txq->in_use += ndesc;
877     /*
878      * XXX we don't handle stopping of queue
879      * presumably start handles this when we bump against the end
880      */
881     txqs->gen = txq->gen;
882     txq->unacked += ndesc;
883     txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
884     txq->unacked &= 7;
885     txqs->pidx = txq->pidx;
886     txq->pidx += ndesc;
887 
888     if (txq->pidx >= txq->size) {
889         txq->pidx -= txq->size;
890         txq->gen ^= 1;
891     }
892 
893 }
894 
895 /**
896  *  calc_tx_descs - calculate the number of Tx descriptors for a packet
897  *  @m: the packet mbufs
898  *      @nsegs: the number of segments
899  *
900  *  Returns the number of Tx descriptors needed for the given Ethernet
901  *  packet.  Ethernet packets require addition of WR and CPL headers.
902  */
903 static __inline unsigned int
calc_tx_descs(const struct mbuf * m,int nsegs)904 calc_tx_descs(const struct mbuf *m, int nsegs)
905 {
906     unsigned int flits;
907 
908     if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
909         return 1;
910 
911     flits = sgl_len(nsegs) + 2;
912 #ifdef TSO_SUPPORTED
913     if  (m->m_pkthdr.csum_flags & (CSUM_TSO))
914         flits++;
915 #endif
916     return flits_to_desc(flits);
917 }
918 
919 static unsigned int
busdma_map_mbufs(struct mbuf ** m,struct sge_txq * txq,struct tx_sw_desc * stx,bus_dma_segment_t * segs,int * nsegs)920 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
921     struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
922 {
923     struct mbuf *m0;
924     int err, pktlen;
925     int i, total_len;
926 
927     m0 = *m;
928     pktlen = m0->m_pkthdr.len;
929 
930     m0 = *m;
931     i = 0;
932     total_len = 0;
933     while (m0)
934     {
935         i++;
936         total_len += m0->m_len;
937         m0 = m0->m_next;
938     }
939     err = bus_dmamap_create(txq->entry_tag, total_len, TX_MAX_SEGS, total_len, 0, BUS_DMA_NOWAIT, &stx->map);
940     if (err)
941         return (err);
942     err = bus_dmamap_load_mbuf(txq->entry_tag, stx->map, *m, 0);
943     if (err)
944         return (err);
945     // feed out the physical mappings
946     *nsegs = stx->map->dm_nsegs;
947     for (i=0; i<*nsegs; i++)
948     {
949         segs[i] = stx->map->dm_segs[i];
950     }
951 #ifdef DEBUG
952     if (err) {
953         int n = 0;
954         struct mbuf *mtmp = m0;
955         while(mtmp) {
956             n++;
957             mtmp = mtmp->m_next;
958         }
959         printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
960             err, m0->m_pkthdr.len, n);
961     }
962 #endif
963     if (err == EFBIG) {
964         /* Too many segments, try to defrag */
965         m0 = m_defrag(m0, M_DONTWAIT);
966         if (m0 == NULL) {
967             m_freem(*m);
968             *m = NULL;
969             return (ENOBUFS);
970         }
971         *m = m0;
972         INT3; // XXXXXXXXXXXXXXXXXX like above!
973     }
974 
975     if (err == ENOMEM) {
976         return (err);
977     }
978 
979     if (err) {
980         if (cxgb_debug)
981             printf("map failure err=%d pktlen=%d\n", err, pktlen);
982         m_freem_vec(m0);
983         *m = NULL;
984         return (err);
985     }
986 
987     bus_dmamap_sync(txq->entry_tag, stx->map, 0, pktlen, BUS_DMASYNC_PREWRITE);
988     stx->flags |= TX_SW_DESC_MAPPED;
989 
990     return (0);
991 }
992 
993 /**
994  *  make_sgl - populate a scatter/gather list for a packet
995  *  @sgp: the SGL to populate
996  *  @segs: the packet dma segments
997  *  @nsegs: the number of segments
998  *
999  *  Generates a scatter/gather list for the buffers that make up a packet
1000  *  and returns the SGL size in 8-byte words.  The caller must size the SGL
1001  *  appropriately.
1002  */
1003 static __inline void
make_sgl(struct sg_ent * sgp,bus_dma_segment_t * segs,int nsegs)1004 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1005 {
1006     int i, idx;
1007 
1008     for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
1009         if (i && idx == 0)
1010             ++sgp;
1011 
1012         sgp->len[idx] = htobe32(segs[i].ds_len);
1013         sgp->addr[idx] = htobe64(segs[i].ds_addr);
1014     }
1015 
1016     if (idx)
1017         sgp->len[idx] = 0;
1018 }
1019 
1020 /**
1021  *  check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1022  *  @adap: the adapter
1023  *  @q: the Tx queue
1024  *
1025  *  Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1026  *  where the HW is going to sleep just after we checked, however,
1027  *  then the interrupt handler will detect the outstanding TX packet
1028  *  and ring the doorbell for us.
1029  *
1030  *  When GTS is disabled we unconditionally ring the doorbell.
1031  */
1032 static __inline void
check_ring_tx_db(adapter_t * adap,struct sge_txq * q)1033 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1034 {
1035 #if USE_GTS
1036     clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1037     if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1038         set_bit(TXQ_LAST_PKT_DB, &q->flags);
1039 #ifdef T3_TRACE
1040         T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1041               q->cntxt_id);
1042 #endif
1043         t3_write_reg(adap, A_SG_KDOORBELL,
1044                  F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1045     }
1046 #else
1047     wmb();            /* write descriptors before telling HW */
1048     t3_write_reg(adap, A_SG_KDOORBELL,
1049              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1050 #endif
1051 }
1052 
1053 static __inline void
wr_gen2(struct tx_desc * d,unsigned int gen)1054 wr_gen2(struct tx_desc *d, unsigned int gen)
1055 {
1056 #if SGE_NUM_GENBITS == 2
1057     d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1058 #endif
1059 }
1060 
1061 
1062 
1063 /**
1064  *  write_wr_hdr_sgl - write a WR header and, optionally, SGL
1065  *  @ndesc: number of Tx descriptors spanned by the SGL
1066  *  @txd: first Tx descriptor to be written
1067  *  @txqs: txq state (generation and producer index)
1068  *  @txq: the SGE Tx queue
1069  *  @sgl: the SGL
1070  *  @flits: number of flits to the start of the SGL in the first descriptor
1071  *  @sgl_flits: the SGL size in flits
1072  *  @wr_hi: top 32 bits of WR header based on WR type (big endian)
1073  *  @wr_lo: low 32 bits of WR header based on WR type (big endian)
1074  *
1075  *  Write a work request header and an associated SGL.  If the SGL is
1076  *  small enough to fit into one Tx descriptor it has already been written
1077  *  and we just need to write the WR header.  Otherwise we distribute the
1078  *  SGL across the number of descriptors it spans.
1079  */
1080 
1081 static void
write_wr_hdr_sgl(unsigned int ndesc,struct tx_desc * txd,struct txq_state * txqs,const struct sge_txq * txq,const struct sg_ent * sgl,unsigned int flits,unsigned int sgl_flits,unsigned int wr_hi,unsigned int wr_lo)1082 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1083     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1084     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1085 {
1086 
1087     struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1088     struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1089 
1090     if (__predict_true(ndesc == 1)) {
1091         wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1092             V_WR_SGLSFLT(flits)) | wr_hi;
1093         wmb();
1094         wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1095             V_WR_GEN(txqs->gen)) | wr_lo;
1096         /* XXX gen? */
1097         wr_gen2(txd, txqs->gen);
1098     } else {
1099         unsigned int ogen = txqs->gen;
1100         const uint64_t *fp = (const uint64_t *)sgl;
1101         struct work_request_hdr *wp = wrp;
1102 
1103         wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1104             V_WR_SGLSFLT(flits)) | wr_hi;
1105 
1106         while (sgl_flits) {
1107             unsigned int avail = WR_FLITS - flits;
1108 
1109             if (avail > sgl_flits)
1110                 avail = sgl_flits;
1111             memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1112             sgl_flits -= avail;
1113             ndesc--;
1114             if (!sgl_flits)
1115                 break;
1116 
1117             fp += avail;
1118             txd++;
1119             txsd++;
1120             if (++txqs->pidx == txq->size) {
1121                 txqs->pidx = 0;
1122                 txqs->gen ^= 1;
1123                 txd = txq->desc;
1124                 txsd = txq->sdesc;
1125             }
1126 
1127             /*
1128              * when the head of the mbuf chain
1129              * is freed all clusters will be freed
1130              * with it
1131              */
1132             txsd->m = NULL;
1133             wrp = (struct work_request_hdr *)txd;
1134             wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1135                 V_WR_SGLSFLT(1)) | wr_hi;
1136             wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1137                     sgl_flits + 1)) |
1138                 V_WR_GEN(txqs->gen)) | wr_lo;
1139             wr_gen2(txd, txqs->gen);
1140             flits = 1;
1141         }
1142         wrp->wr_hi |= htonl(F_WR_EOP);
1143         wmb();
1144         wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1145         wr_gen2((struct tx_desc *)wp, ogen);
1146     }
1147 }
1148 
1149 
1150 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1151 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1152 
1153 int
t3_encap(struct port_info * p,struct mbuf ** m,int * free_it)1154 t3_encap(struct port_info *p, struct mbuf **m, int *free_it)
1155 {
1156     adapter_t *sc;
1157     struct mbuf *m0;
1158     struct sge_qset *qs;
1159     struct sge_txq *txq;
1160     struct tx_sw_desc *stx;
1161     struct txq_state txqs;
1162     unsigned int ndesc, flits, cntrl, mlen;
1163     int err, nsegs, tso_info = 0;
1164 
1165     struct work_request_hdr *wrp;
1166     struct tx_sw_desc *txsd;
1167     struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1168     bus_dma_segment_t segs[TX_MAX_SEGS];
1169     uint32_t wr_hi, wr_lo, sgl_flits;
1170 
1171     struct tx_desc *txd;
1172     struct cpl_tx_pkt *cpl;
1173 
1174     m0 = *m;
1175     sc = p->adapter;
1176 
1177     DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
1178 
1179     /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
1180 
1181     qs = &sc->sge.qs[p->first_qset];
1182 
1183     txq = &qs->txq[TXQ_ETH];
1184     stx = &txq->sdesc[txq->pidx];
1185     txd = &txq->desc[txq->pidx];
1186     cpl = (struct cpl_tx_pkt *)txd;
1187     mlen = m0->m_pkthdr.len;
1188     cpl->len = htonl(mlen | 0x80000000);
1189 
1190     DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
1191     /*
1192      * XXX handle checksum, TSO, and VLAN here
1193      *
1194      */
1195     cntrl = V_TXPKT_INTF(p->txpkt_intf);
1196 
1197     /*
1198      * XXX need to add VLAN support for 6.x
1199      */
1200 #ifdef VLAN_SUPPORTED
1201     if (m0->m_flags & M_VLANTAG)
1202         cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
1203     if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1204         tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1205 #endif
1206     if (tso_info) {
1207         int eth_type;
1208         struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
1209         struct ip *ip;
1210         struct tcphdr *tcp;
1211         char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
1212 
1213         txd->flit[2] = 0;
1214         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1215         hdr->cntrl = htonl(cntrl);
1216 
1217         if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1218             pkthdr = &tmp[0];
1219             m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
1220         } else {
1221             pkthdr = mtod(m0, char *);
1222         }
1223 
1224 #ifdef VLAN_SUPPORTED
1225         if (__predict_false(m0->m_flags & M_VLANTAG)) {
1226             eth_type = CPL_ETH_II_VLAN;
1227             ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1228                 ETHER_VLAN_ENCAP_LEN);
1229         } else {
1230             eth_type = CPL_ETH_II;
1231             ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1232         }
1233 #else
1234         eth_type = CPL_ETH_II;
1235         ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1236 #endif
1237         tcp = (struct tcphdr *)((uint8_t *)ip +
1238             sizeof(*ip));
1239 
1240         tso_info |= V_LSO_ETH_TYPE(eth_type) |
1241                 V_LSO_IPHDR_WORDS(ip->ip_hl) |
1242                 V_LSO_TCPHDR_WORDS(tcp->th_off);
1243         hdr->lso_info = htonl(tso_info);
1244         flits = 3;
1245     } else {
1246         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1247         cpl->cntrl = htonl(cntrl);
1248 
1249         if (mlen <= WR_LEN - sizeof(*cpl)) {
1250             txq_prod(txq, 1, &txqs);
1251             txq->sdesc[txqs.pidx].m = NULL;
1252 
1253             if (m0->m_len == m0->m_pkthdr.len)
1254                 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
1255             else
1256                 m_copydata(m0, 0, mlen, (void *)&txd->flit[2]);
1257 
1258             *free_it = 1;
1259             flits = (mlen + 7) / 8 + 2;
1260             cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1261                       V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1262                       F_WR_SOP | F_WR_EOP | txqs.compl);
1263             wmb();
1264             cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1265                 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1266 
1267             wr_gen2(txd, txqs.gen);
1268             check_ring_tx_db(sc, txq);
1269             return (0);
1270         }
1271         flits = 2;
1272     }
1273 
1274     wrp = (struct work_request_hdr *)txd;
1275 
1276     if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
1277         return (err);
1278     }
1279     m0 = *m;
1280     ndesc = calc_tx_descs(m0, nsegs);
1281 
1282     sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1283     make_sgl(sgp, segs, nsegs);
1284 
1285     sgl_flits = sgl_len(nsegs);
1286 
1287     DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1288     txq_prod(txq, ndesc, &txqs);
1289     txsd = &txq->sdesc[txqs.pidx];
1290     wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1291     wr_lo = htonl(V_WR_TID(txq->token));
1292     txsd->m = m0;
1293     m_set_priority(m0, txqs.pidx);
1294 
1295     write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1296     check_ring_tx_db(p->adapter, txq);
1297 
1298     return (0);
1299 }
1300 
1301 
1302 /**
1303  *  write_imm - write a packet into a Tx descriptor as immediate data
1304  *  @d: the Tx descriptor to write
1305  *  @m: the packet
1306  *  @len: the length of packet data to write as immediate data
1307  *  @gen: the generation bit value to write
1308  *
1309  *  Writes a packet as immediate data into a Tx descriptor.  The packet
1310  *  contains a work request at its beginning.  We must write the packet
1311  *  carefully so the SGE doesn't read accidentally before it's written in
1312  *  its entirety.
1313  */
1314 static __inline void
write_imm(struct tx_desc * d,struct mbuf * m,unsigned int len,unsigned int gen)1315 write_imm(struct tx_desc *d, struct mbuf *m,
1316       unsigned int len, unsigned int gen)
1317 {
1318     struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1319     struct work_request_hdr *to = (struct work_request_hdr *)d;
1320 
1321     memcpy(&to[1], &from[1], len - sizeof(*from));
1322     to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1323                     V_WR_BCNTLFLT(len & 7));
1324     wmb();
1325     to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1326                     V_WR_LEN((len + 7) / 8));
1327     wr_gen2(d, gen);
1328     m_freem(m);
1329 }
1330 
1331 /**
1332  *  check_desc_avail - check descriptor availability on a send queue
1333  *  @adap: the adapter
1334  *  @q: the TX queue
1335  *  @m: the packet needing the descriptors
1336  *  @ndesc: the number of Tx descriptors needed
1337  *  @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1338  *
1339  *  Checks if the requested number of Tx descriptors is available on an
1340  *  SGE send queue.  If the queue is already suspended or not enough
1341  *  descriptors are available the packet is queued for later transmission.
1342  *  Must be called with the Tx queue locked.
1343  *
1344  *  Returns 0 if enough descriptors are available, 1 if there aren't
1345  *  enough descriptors and the packet has been queued, and 2 if the caller
1346  *  needs to retry because there weren't enough descriptors at the
1347  *  beginning of the call but some freed up in the mean time.
1348  */
1349 static __inline int
check_desc_avail(adapter_t * adap,struct sge_txq * q,struct mbuf * m,unsigned int ndesc,unsigned int qid)1350 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1351          struct mbuf *m, unsigned int ndesc,
1352          unsigned int qid)
1353 {
1354     /*
1355      * XXX We currently only use this for checking the control queue
1356      * the control queue is only used for binding qsets which happens
1357      * at init time so we are guaranteed enough descriptors
1358      */
1359     if (__predict_false(!mbufq_empty(&q->sendq))) {
1360 addq_exit:  mbufq_tail(&q->sendq, m);
1361         return 1;
1362     }
1363     if (__predict_false(q->size - q->in_use < ndesc)) {
1364 
1365         struct sge_qset *qs = txq_to_qset(q, qid);
1366 
1367         setbit(&qs->txq_stopped, qid);
1368         smp_mb();
1369 
1370         if (should_restart_tx(q) &&
1371             test_and_clear_bit(qid, &qs->txq_stopped))
1372             return 2;
1373 
1374         q->stops++;
1375         goto addq_exit;
1376     }
1377     return 0;
1378 }
1379 
1380 
1381 /**
1382  *  reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1383  *  @q: the SGE control Tx queue
1384  *
1385  *  This is a variant of reclaim_completed_tx() that is used for Tx queues
1386  *  that send only immediate data (presently just the control queues) and
1387  *  thus do not have any mbufs
1388  */
1389 static __inline void
reclaim_completed_tx_imm(struct sge_txq * q)1390 reclaim_completed_tx_imm(struct sge_txq *q)
1391 {
1392     unsigned int reclaim = q->processed - q->cleaned;
1393 
1394     mtx_assert(&q->lock, MA_OWNED);
1395 
1396     q->in_use -= reclaim;
1397     q->cleaned += reclaim;
1398 }
1399 
1400 static __inline int
immediate(const struct mbuf * m)1401 immediate(const struct mbuf *m)
1402 {
1403     return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1404 }
1405 
1406 /**
1407  *  ctrl_xmit - send a packet through an SGE control Tx queue
1408  *  @adap: the adapter
1409  *  @q: the control queue
1410  *  @m: the packet
1411  *
1412  *  Send a packet through an SGE control Tx queue.  Packets sent through
1413  *  a control queue must fit entirely as immediate data in a single Tx
1414  *  descriptor and have no page fragments.
1415  */
1416 static int
ctrl_xmit(adapter_t * adap,struct sge_txq * q,struct mbuf * m)1417 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1418 {
1419     int ret;
1420     struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1421 
1422     if (__predict_false(!immediate(m))) {
1423         m_freem(m);
1424         return 0;
1425     }
1426 
1427     wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1428     wrp->wr_lo = htonl(V_WR_TID(q->token));
1429 
1430     mtx_lock(&q->lock);
1431 again:  reclaim_completed_tx_imm(q);
1432 
1433     ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1434     if (__predict_false(ret)) {
1435         if (ret == 1) {
1436             mtx_unlock(&q->lock);
1437             return (-1);
1438         }
1439         goto again;
1440     }
1441 
1442     write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1443 
1444     q->in_use++;
1445     if (++q->pidx >= q->size) {
1446         q->pidx = 0;
1447         q->gen ^= 1;
1448     }
1449     mtx_unlock(&q->lock);
1450     wmb();
1451     t3_write_reg(adap, A_SG_KDOORBELL,
1452              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1453     return (0);
1454 }
1455 
1456 
1457 /**
1458  *  restart_ctrlq - restart a suspended control queue
1459  *  @qs: the queue set cotaining the control queue
1460  *
1461  *  Resumes transmission on a suspended Tx control queue.
1462  */
1463 static void
restart_ctrlq(struct work * wk,void * data)1464 restart_ctrlq(struct work *wk, void *data)
1465 {
1466     struct mbuf *m;
1467     struct sge_qset *qs = (struct sge_qset *)data;
1468     struct sge_txq *q = &qs->txq[TXQ_CTRL];
1469     adapter_t *adap = qs->port->adapter;
1470 
1471     mtx_lock(&q->lock);
1472 again:  reclaim_completed_tx_imm(q);
1473 
1474     while (q->in_use < q->size &&
1475            (m = mbufq_dequeue(&q->sendq)) != NULL) {
1476 
1477         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1478 
1479         if (++q->pidx >= q->size) {
1480             q->pidx = 0;
1481             q->gen ^= 1;
1482         }
1483         q->in_use++;
1484     }
1485     if (!mbufq_empty(&q->sendq)) {
1486         setbit(&qs->txq_stopped, TXQ_CTRL);
1487         smp_mb();
1488 
1489         if (should_restart_tx(q) &&
1490             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1491             goto again;
1492         q->stops++;
1493     }
1494     mtx_unlock(&q->lock);
1495     t3_write_reg(adap, A_SG_KDOORBELL,
1496              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1497 }
1498 
1499 
1500 /*
1501  * Send a management message through control queue 0
1502  */
1503 int
t3_mgmt_tx(struct adapter * adap,struct mbuf * m)1504 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1505 {
1506     return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1507 }
1508 
1509 /**
1510  *  free_qset - free the resources of an SGE queue set
1511  *  @sc: the controller owning the queue set
1512  *  @q: the queue set
1513  *
1514  *  Release the HW and SW resources associated with an SGE queue set, such
1515  *  as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1516  *  queue set must be quiesced prior to calling this.
1517  */
1518 static void
t3_free_qset(adapter_t * sc,struct sge_qset * q)1519 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1520 {
1521     int i;
1522 
1523     for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1524         if (q->fl[i].desc) {
1525             mtx_lock(&sc->sge.reg_lock);
1526             t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1527             mtx_unlock(&sc->sge.reg_lock);
1528             bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1529 		INT3;
1530 //            bus_dmamem_free(q->fl[i].desc_tag, &q->fl[i].phys_addr, 1);
1531             // XXXXXXXXXXX destroy DMA tags????
1532         }
1533         if (q->fl[i].sdesc) {
1534             free_rx_bufs(sc, &q->fl[i]);
1535             free(q->fl[i].sdesc, M_DEVBUF);
1536         }
1537     }
1538 
1539     for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1540         if (q->txq[i].desc) {
1541             mtx_lock(&sc->sge.reg_lock);
1542             t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1543             mtx_unlock(&sc->sge.reg_lock);
1544             bus_dmamap_unload(q->txq[i].desc_tag,
1545                     q->txq[i].desc_map);
1546 		INT3;
1547 //            bus_dmamem_free(q->txq[i].desc_tag, &q->txq[i].phys_addr, 1);
1548             // XXXXXXXXXXX destroy DMA tags????  And the lock?!??!
1549 
1550         }
1551         if (q->txq[i].sdesc) {
1552             free(q->txq[i].sdesc, M_DEVBUF);
1553         }
1554     }
1555 
1556     if (q->rspq.desc) {
1557         mtx_lock(&sc->sge.reg_lock);
1558         t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1559         mtx_unlock(&sc->sge.reg_lock);
1560 
1561         bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1562 	INT3;
1563 //        bus_dmamem_free(q->rspq.desc_tag, &q->rspq.phys_addr, 1);
1564         // XXXXXXXXXXX destroy DMA tags???? and the LOCK ?!?!?
1565     }
1566 
1567     memset(q, 0, sizeof(*q));
1568 }
1569 
1570 /**
1571  *  t3_free_sge_resources - free SGE resources
1572  *  @sc: the adapter softc
1573  *
1574  *  Frees resources used by the SGE queue sets.
1575  */
1576 void
t3_free_sge_resources(adapter_t * sc)1577 t3_free_sge_resources(adapter_t *sc)
1578 {
1579     int i, nqsets;
1580 
1581     for (nqsets = i = 0; i < (sc)->params.nports; i++)
1582         nqsets += sc->port[i].nqsets;
1583 
1584     for (i = 0; i < nqsets; ++i)
1585         t3_free_qset(sc, &sc->sge.qs[i]);
1586 }
1587 
1588 /**
1589  *  t3_sge_start - enable SGE
1590  *  @sc: the controller softc
1591  *
1592  *  Enables the SGE for DMAs.  This is the last step in starting packet
1593  *  transfers.
1594  */
1595 void
t3_sge_start(adapter_t * sc)1596 t3_sge_start(adapter_t *sc)
1597 {
1598     t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1599 }
1600 
1601 /**
1602  *  t3_sge_stop - disable SGE operation
1603  *  @sc: the adapter
1604  *
1605  *  Disables the DMA engine.  This can be called in emeregencies (e.g.,
1606  *  from error interrupts) or from normal process context.  In the latter
1607  *  case it also disables any pending queue restart tasklets.  Note that
1608  *  if it is called in interrupt context it cannot disable the restart
1609  *  tasklets as it cannot wait, however the tasklets will have no effect
1610  *  since the doorbells are disabled and the driver will call this again
1611  *  later from process context, at which time the tasklets will be stopped
1612  *  if they are still running.
1613  */
1614 void
t3_sge_stop(adapter_t * sc)1615 t3_sge_stop(adapter_t *sc)
1616 {
1617     int i, nqsets;
1618 
1619     t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1620 
1621     for (nqsets = i = 0; i < (sc)->params.nports; i++)
1622         nqsets += sc->port[i].nqsets;
1623 
1624     for (i = 0; i < nqsets; ++i) {
1625     }
1626 }
1627 
1628 
1629 /**
1630  *  free_tx_desc - reclaims Tx descriptors and their buffers
1631  *  @adapter: the adapter
1632  *  @q: the Tx queue to reclaim descriptors from
1633  *  @n: the number of descriptors to reclaim
1634  *
1635  *  Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1636  *  Tx buffers.  Called with the Tx queue lock held.
1637  */
1638 int
free_tx_desc(struct sge_txq * q,int n,struct mbuf ** m_vec)1639 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
1640 {
1641     struct tx_sw_desc *d;
1642     unsigned int cidx = q->cidx;
1643     int nbufs = 0;
1644 
1645 #ifdef T3_TRACE
1646     T3_TRACE2(sc->tb[q->cntxt_id & 7],
1647           "reclaiming %u Tx descriptors at cidx %u", n, cidx);
1648 #endif
1649     d = &q->sdesc[cidx];
1650 
1651     while (n-- > 0) {
1652         DPRINTF("cidx=%d d=%p\n", cidx, d);
1653         if (d->m) {
1654             if (d->flags & TX_SW_DESC_MAPPED) {
1655                 bus_dmamap_unload(q->entry_tag, d->map);
1656                 bus_dmamap_destroy(q->entry_tag, d->map);
1657                 d->flags &= ~TX_SW_DESC_MAPPED;
1658             }
1659             if (m_get_priority(d->m) == cidx) {
1660                 m_vec[nbufs] = d->m;
1661                 d->m = NULL;
1662                 nbufs++;
1663             } else {
1664                 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
1665             }
1666         }
1667         ++d;
1668         if (++cidx == q->size) {
1669             cidx = 0;
1670             d = q->sdesc;
1671         }
1672     }
1673     q->cidx = cidx;
1674 
1675     return (nbufs);
1676 }
1677 
1678 /**
1679  *  is_new_response - check if a response is newly written
1680  *  @r: the response descriptor
1681  *  @q: the response queue
1682  *
1683  *  Returns true if a response descriptor contains a yet unprocessed
1684  *  response.
1685  */
1686 static __inline int
is_new_response(const struct rsp_desc * r,const struct sge_rspq * q)1687 is_new_response(const struct rsp_desc *r,
1688     const struct sge_rspq *q)
1689 {
1690     return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1691 }
1692 
1693 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1694 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1695             V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1696             V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1697             V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1698 
1699 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1700 #define NOMEM_INTR_DELAY 2500
1701 
1702 /**
1703  *  write_ofld_wr - write an offload work request
1704  *  @adap: the adapter
1705  *  @m: the packet to send
1706  *  @q: the Tx queue
1707  *  @pidx: index of the first Tx descriptor to write
1708  *  @gen: the generation value to use
1709  *  @ndesc: number of descriptors the packet will occupy
1710  *
1711  *  Write an offload work request to send the supplied packet.  The packet
1712  *  data already carry the work request with most fields populated.
1713  */
1714 static void
write_ofld_wr(adapter_t * adap,struct mbuf * m,struct sge_txq * q,unsigned int pidx,unsigned int gen,unsigned int ndesc,bus_dma_segment_t * segs,unsigned int nsegs)1715 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1716     struct sge_txq *q, unsigned int pidx,
1717     unsigned int gen, unsigned int ndesc,
1718     bus_dma_segment_t *segs, unsigned int nsegs)
1719 {
1720     unsigned int sgl_flits, flits;
1721     struct work_request_hdr *from;
1722     struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1723     struct tx_desc *d = &q->desc[pidx];
1724     struct txq_state txqs;
1725 
1726     if (immediate(m)) {
1727         q->sdesc[pidx].m = NULL;
1728         write_imm(d, m, m->m_len, gen);
1729         return;
1730     }
1731 
1732     /* Only TX_DATA builds SGLs */
1733 
1734     from = mtod(m, struct work_request_hdr *);
1735     INT3; ///  DEBUG this???
1736     flits = 3; // XXXXXXXXXXXXXX
1737 
1738     sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1739 
1740     make_sgl(sgp, segs, nsegs);
1741     sgl_flits = sgl_len(nsegs);
1742 
1743     txqs.gen = q->gen;
1744     txqs.pidx = q->pidx;
1745     txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1746     write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1747         from->wr_hi, from->wr_lo);
1748 }
1749 
1750 /**
1751  *  calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1752  *  @m: the packet
1753  *
1754  *  Returns the number of Tx descriptors needed for the given offload
1755  *  packet.  These packets are already fully constructed.
1756  */
1757 static __inline unsigned int
calc_tx_descs_ofld(struct mbuf * m,unsigned int nsegs)1758 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1759 {
1760     unsigned int flits, cnt = 0;
1761 
1762 
1763     if (m->m_len <= WR_LEN)
1764         return 1;                 /* packet fits as immediate data */
1765 
1766     if (m->m_flags & M_IOVEC)
1767         cnt = mtomv(m)->mv_count;
1768 
1769     INT3; // Debug this????
1770     flits = 3; // XXXXXXXXX
1771 
1772     return flits_to_desc(flits + sgl_len(cnt));
1773 }
1774 
1775 /**
1776  *  ofld_xmit - send a packet through an offload queue
1777  *  @adap: the adapter
1778  *  @q: the Tx offload queue
1779  *  @m: the packet
1780  *
1781  *  Send an offload packet through an SGE offload queue.
1782  */
1783 static int
ofld_xmit(adapter_t * adap,struct sge_txq * q,struct mbuf * m)1784 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1785 {
1786     int ret, nsegs;
1787     unsigned int ndesc;
1788     unsigned int pidx, gen;
1789     struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1790     bus_dma_segment_t segs[TX_MAX_SEGS];
1791     int i, cleaned;
1792     struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1793 
1794     mtx_lock(&q->lock);
1795     if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
1796         mtx_unlock(&q->lock);
1797         return (ret);
1798     }
1799     ndesc = calc_tx_descs_ofld(m, nsegs);
1800 again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1801 
1802     ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1803     if (__predict_false(ret)) {
1804         if (ret == 1) {
1805             m_set_priority(m, ndesc);     /* save for restart */
1806             mtx_unlock(&q->lock);
1807             return EINTR;
1808         }
1809         goto again;
1810     }
1811 
1812     gen = q->gen;
1813     q->in_use += ndesc;
1814     pidx = q->pidx;
1815     q->pidx += ndesc;
1816     if (q->pidx >= q->size) {
1817         q->pidx -= q->size;
1818         q->gen ^= 1;
1819     }
1820 #ifdef T3_TRACE
1821     T3_TRACE5(adap->tb[q->cntxt_id & 7],
1822           "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1823           ndesc, pidx, skb->len, skb->len - skb->data_len,
1824           skb_shinfo(skb)->nr_frags);
1825 #endif
1826     mtx_unlock(&q->lock);
1827 
1828     write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1829     check_ring_tx_db(adap, q);
1830 
1831     for (i = 0; i < cleaned; i++) {
1832         m_freem_vec(m_vec[i]);
1833     }
1834     return (0);
1835 }
1836 
1837 /**
1838  *  restart_offloadq - restart a suspended offload queue
1839  *  @qs: the queue set cotaining the offload queue
1840  *
1841  *  Resumes transmission on a suspended Tx offload queue.
1842  */
1843 static void
restart_offloadq(struct work * wk,void * data)1844 restart_offloadq(struct work *wk, void *data)
1845 {
1846 
1847     struct mbuf *m;
1848     struct sge_qset *qs = data;
1849     struct sge_txq *q = &qs->txq[TXQ_OFLD];
1850     adapter_t *adap = qs->port->adapter;
1851     struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
1852     bus_dma_segment_t segs[TX_MAX_SEGS];
1853     int nsegs, i, cleaned;
1854     struct tx_sw_desc *stx = &q->sdesc[q->pidx];
1855 
1856     mtx_lock(&q->lock);
1857 again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
1858 
1859     while ((m = mbufq_peek(&q->sendq)) != NULL) {
1860         unsigned int gen, pidx;
1861         unsigned int ndesc = m_get_priority(m);
1862 
1863         if (__predict_false(q->size - q->in_use < ndesc)) {
1864             setbit(&qs->txq_stopped, TXQ_OFLD);
1865             smp_mb();
1866 
1867             if (should_restart_tx(q) &&
1868                 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1869                 goto again;
1870             q->stops++;
1871             break;
1872         }
1873 
1874         gen = q->gen;
1875         q->in_use += ndesc;
1876         pidx = q->pidx;
1877         q->pidx += ndesc;
1878         if (q->pidx >= q->size) {
1879             q->pidx -= q->size;
1880             q->gen ^= 1;
1881         }
1882 
1883         (void)mbufq_dequeue(&q->sendq);
1884         busdma_map_mbufs(&m, q, stx, segs, &nsegs);
1885         mtx_unlock(&q->lock);
1886         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
1887         mtx_lock(&q->lock);
1888     }
1889     mtx_unlock(&q->lock);
1890 
1891 #if USE_GTS
1892     set_bit(TXQ_RUNNING, &q->flags);
1893     set_bit(TXQ_LAST_PKT_DB, &q->flags);
1894 #endif
1895     t3_write_reg(adap, A_SG_KDOORBELL,
1896              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1897 
1898     for (i = 0; i < cleaned; i++) {
1899         m_freem_vec(m_vec[i]);
1900     }
1901 }
1902 
1903 /**
1904  *  queue_set - return the queue set a packet should use
1905  *  @m: the packet
1906  *
1907  *  Maps a packet to the SGE queue set it should use.  The desired queue
1908  *  set is carried in bits 1-3 in the packet's priority.
1909  */
1910 static __inline int
queue_set(const struct mbuf * m)1911 queue_set(const struct mbuf *m)
1912 {
1913     return m_get_priority(m) >> 1;
1914 }
1915 
1916 /**
1917  *  is_ctrl_pkt - return whether an offload packet is a control packet
1918  *  @m: the packet
1919  *
1920  *  Determines whether an offload packet should use an OFLD or a CTRL
1921  *  Tx queue.  This is indicated by bit 0 in the packet's priority.
1922  */
1923 static __inline int
is_ctrl_pkt(const struct mbuf * m)1924 is_ctrl_pkt(const struct mbuf *m)
1925 {
1926     return m_get_priority(m) & 1;
1927 }
1928 
1929 /**
1930  *  t3_offload_tx - send an offload packet
1931  *  @tdev: the offload device to send to
1932  *  @m: the packet
1933  *
1934  *  Sends an offload packet.  We use the packet priority to select the
1935  *  appropriate Tx queue as follows: bit 0 indicates whether the packet
1936  *  should be sent as regular or control, bits 1-3 select the queue set.
1937  */
1938 int
t3_offload_tx(struct toedev * tdev,struct mbuf * m)1939 t3_offload_tx(struct toedev *tdev, struct mbuf *m)
1940 {
1941     adapter_t *adap = tdev2adap(tdev);
1942     struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
1943 
1944     if (__predict_false(is_ctrl_pkt(m)))
1945         return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
1946 
1947     return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
1948 }
1949 
1950 static void
restart_tx(struct sge_qset * qs)1951 restart_tx(struct sge_qset *qs)
1952 {
1953     if (isset(&qs->txq_stopped, TXQ_OFLD) &&
1954         should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1955         test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1956         qs->txq[TXQ_OFLD].restarts++;
1957         workqueue_enqueue(qs->txq[TXQ_OFLD].qresume_task.wq, &qs->txq[TXQ_OFLD].qresume_task.w, NULL);
1958     }
1959     if (isset(&qs->txq_stopped, TXQ_CTRL) &&
1960         should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1961         test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1962         qs->txq[TXQ_CTRL].restarts++;
1963         workqueue_enqueue(qs->txq[TXQ_CTRL].qresume_task.wq, &qs->txq[TXQ_CTRL].qresume_task.w, NULL);
1964     }
1965 }
1966 
1967 /**
1968  *  t3_sge_alloc_qset - initialize an SGE queue set
1969  *  @sc: the controller softc
1970  *  @id: the queue set id
1971  *  @nports: how many Ethernet ports will be using this queue set
1972  *  @irq_vec_idx: the IRQ vector index for response queue interrupts
1973  *  @p: configuration parameters for this queue set
1974  *  @ntxq: number of Tx queues for the queue set
1975  *  @pi: port info for queue set
1976  *
1977  *  Allocate resources and initialize an SGE queue set.  A queue set
1978  *  comprises a response queue, two Rx free-buffer queues, and up to 3
1979  *  Tx queues.  The Tx queues are assigned roles in the order Ethernet
1980  *  queue, offload queue, and control queue.
1981  */
1982 int
t3_sge_alloc_qset(adapter_t * sc,u_int id,int nports,int irq_vec_idx,const struct qset_params * p,int ntxq,struct port_info * pi)1983 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
1984           const struct qset_params *p, int ntxq, struct port_info *pi)
1985 {
1986     struct sge_qset *q = &sc->sge.qs[id];
1987     int i, ret = 0;
1988 
1989     init_qset_cntxt(q, id);
1990 
1991     if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
1992             sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
1993             &q->fl[0].desc, &q->fl[0].sdesc,
1994             &q->fl[0].desc_tag, &q->fl[0].desc_map,
1995             sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
1996         goto err;
1997     }
1998 
1999     if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2000             sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2001             &q->fl[1].desc, &q->fl[1].sdesc,
2002             &q->fl[1].desc_tag, &q->fl[1].desc_map,
2003             sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2004         goto err;
2005     }
2006 
2007     if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2008             &q->rspq.phys_addr, &q->rspq.desc, NULL,
2009             &q->rspq.desc_tag, &q->rspq.desc_map,
2010             NULL, NULL)) != 0) {
2011         goto err;
2012     }
2013 
2014     for (i = 0; i < ntxq; ++i) {
2015         /*
2016          * The control queue always uses immediate data so does not
2017          * need to keep track of any mbufs.
2018          * XXX Placeholder for future TOE support.
2019          */
2020         size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2021 
2022         if ((ret = alloc_ring(sc, p->txq_size[i],
2023                 sizeof(struct tx_desc), sz,
2024                 &q->txq[i].phys_addr, &q->txq[i].desc,
2025                 &q->txq[i].sdesc, &q->txq[i].desc_tag,
2026                 &q->txq[i].desc_map,
2027                 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2028             goto err;
2029         }
2030         mbufq_init(&q->txq[i].sendq);
2031         q->txq[i].gen = 1;
2032         q->txq[i].size = p->txq_size[i];
2033         snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2034             0, irq_vec_idx, i);
2035         MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2036     }
2037 
2038     q->txq[TXQ_ETH].port = pi;
2039 
2040     q->txq[TXQ_OFLD].qresume_task.name = "restart_offloadq";
2041     q->txq[TXQ_OFLD].qresume_task.func = restart_offloadq;
2042     q->txq[TXQ_OFLD].qresume_task.context = q;
2043     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qresume_task, NULL, "cxgb_make_task");
2044 
2045     q->txq[TXQ_CTRL].qresume_task.name = "restart_ctrlq";
2046     q->txq[TXQ_CTRL].qresume_task.func = restart_ctrlq;
2047     q->txq[TXQ_CTRL].qresume_task.context = q;
2048     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_CTRL].qresume_task, NULL, "cxgb_make_task");
2049 
2050     q->txq[TXQ_ETH].qreclaim_task.name = "sge_txq_reclaim_handler";
2051     q->txq[TXQ_ETH].qreclaim_task.func = sge_txq_reclaim_handler;
2052     q->txq[TXQ_ETH].qreclaim_task.context = &q->txq[TXQ_ETH];
2053     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_ETH].qreclaim_task, NULL, "cxgb_make_task");
2054 
2055     q->txq[TXQ_OFLD].qreclaim_task.name = "sge_txq_reclaim_handler";
2056     q->txq[TXQ_OFLD].qreclaim_task.func = sge_txq_reclaim_handler;
2057     q->txq[TXQ_OFLD].qreclaim_task.context = &q->txq[TXQ_OFLD];
2058     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qreclaim_task, NULL, "cxgb_make_task");
2059 
2060     q->fl[0].gen = q->fl[1].gen = 1;
2061     q->fl[0].size = p->fl_size;
2062     q->fl[1].size = p->jumbo_size;
2063 
2064     q->rspq.gen = 1;
2065     q->rspq.cidx = 0;
2066     q->rspq.size = p->rspq_size;
2067 
2068     q->txq[TXQ_ETH].stop_thres = nports *
2069         flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2070 
2071     q->fl[0].buf_size = MCLBYTES;
2072     q->fl[1].buf_size = MJUMPAGESIZE;
2073 
2074     q->lro.enabled = lro_default;
2075 
2076     mtx_lock(&sc->sge.reg_lock);
2077     ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2078                    q->rspq.phys_addr, q->rspq.size,
2079                    q->fl[0].buf_size, 1, 0);
2080     if (ret) {
2081         printf("error %d from t3_sge_init_rspcntxt\n", ret);
2082         goto err_unlock;
2083     }
2084 
2085     for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2086         ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2087                       q->fl[i].phys_addr, q->fl[i].size,
2088                       q->fl[i].buf_size, p->cong_thres, 1,
2089                       0);
2090         if (ret) {
2091             printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2092             goto err_unlock;
2093         }
2094     }
2095 
2096     ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2097                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2098                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2099                  1, 0);
2100     if (ret) {
2101         printf("error %d from t3_sge_init_ecntxt\n", ret);
2102         goto err_unlock;
2103     }
2104 
2105     if (ntxq > 1) {
2106         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2107                      USE_GTS, SGE_CNTXT_OFLD, id,
2108                      q->txq[TXQ_OFLD].phys_addr,
2109                      q->txq[TXQ_OFLD].size, 0, 1, 0);
2110         if (ret) {
2111             printf("error %d from t3_sge_init_ecntxt\n", ret);
2112             goto err_unlock;
2113         }
2114     }
2115 
2116     if (ntxq > 2) {
2117         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2118                      SGE_CNTXT_CTRL, id,
2119                      q->txq[TXQ_CTRL].phys_addr,
2120                      q->txq[TXQ_CTRL].size,
2121                      q->txq[TXQ_CTRL].token, 1, 0);
2122         if (ret) {
2123             printf("error %d from t3_sge_init_ecntxt\n", ret);
2124             goto err_unlock;
2125         }
2126     }
2127 
2128     snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2129         0, irq_vec_idx);
2130     MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2131 
2132     mtx_unlock(&sc->sge.reg_lock);
2133     t3_update_qset_coalesce(q, p);
2134     q->port = pi;
2135 
2136     refill_fl(sc, &q->fl[0], q->fl[0].size);
2137     refill_fl(sc, &q->fl[1], q->fl[1].size);
2138     refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2139 
2140     t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2141              V_NEWTIMER(q->rspq.holdoff_tmr));
2142 
2143     return (0);
2144 
2145 err_unlock:
2146     mtx_unlock(&sc->sge.reg_lock);
2147 err:
2148     t3_free_qset(sc, q);
2149 
2150     return (ret);
2151 }
2152 
2153 void
t3_rx_eth(struct adapter * adap,struct sge_rspq * rq,struct mbuf * m,int ethpad)2154 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2155 {
2156     struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2157     struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2158     struct ifnet *ifp = pi->ifp;
2159 
2160     DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2161 
2162     /*
2163      * XXX need to add VLAN support for 6.x
2164      */
2165 #ifdef VLAN_SUPPORTED
2166     if (__predict_false(cpl->vlan_valid)) {
2167         m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2168         m->m_flags |= M_VLANTAG;
2169     }
2170 #endif
2171 
2172     m_set_rcvif(m, ifp);
2173     m_explode(m);
2174     /*
2175      * adjust after conversion to mbuf chain
2176      */
2177     m_adj(m, sizeof(*cpl) + ethpad);
2178 
2179     if_percpuq_enqueue(ifp->if_percpuq, m);
2180 }
2181 
2182 /**
2183  *  get_packet - return the next ingress packet buffer from a free list
2184  *  @adap: the adapter that received the packet
2185  *  @drop_thres: # of remaining buffers before we start dropping packets
2186  *  @qs: the qset that the SGE free list holding the packet belongs to
2187  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2188  *      @r: response descriptor
2189  *
2190  *  Get the next packet from a free list and complete setup of the
2191  *  sk_buff.  If the packet is small we make a copy and recycle the
2192  *  original buffer, otherwise we use the original buffer itself.  If a
2193  *  positive drop threshold is supplied packets are dropped and their
2194  *  buffers recycled if (a) the number of remaining buffers is under the
2195  *  threshold and the packet is too big to copy, or (b) the packet should
2196  *  be copied but there is no memory for the copy.
2197  */
2198 #ifdef DISABLE_MBUF_IOVEC
2199 
2200 static int
get_packet(adapter_t * adap,unsigned int drop_thres,struct sge_qset * qs,struct t3_mbuf_hdr * mh,struct rsp_desc * r,struct mbuf * m)2201 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2202     struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
2203 {
2204 
2205     unsigned int len_cq =  ntohl(r->len_cq);
2206     struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2207     struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2208     uint32_t len = G_RSPD_LEN(len_cq);
2209     uint32_t flags = ntohl(r->flags);
2210     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2211     int ret = 0;
2212 
2213     prefetch(sd->cl);
2214 
2215     fl->credits--;
2216     bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
2217     bus_dmamap_unload(fl->entry_tag, sd->map);
2218 
2219     m->m_len = len;
2220     m_cljset(m, sd->cl, fl->type);
2221 
2222     switch(sopeop) {
2223     case RSPQ_SOP_EOP:
2224         DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2225         mh->mh_head = mh->mh_tail = m;
2226         m->m_pkthdr.len = len;
2227         m->m_flags |= M_PKTHDR;
2228         ret = 1;
2229         break;
2230     case RSPQ_NSOP_NEOP:
2231         DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2232         m->m_flags &= ~M_PKTHDR;
2233         if (mh->mh_tail == NULL) {
2234             if (cxgb_debug)
2235                 printf("discarding intermediate descriptor entry\n");
2236             m_freem(m);
2237             break;
2238         }
2239         mh->mh_tail->m_next = m;
2240         mh->mh_tail = m;
2241         mh->mh_head->m_pkthdr.len += len;
2242         ret = 0;
2243         break;
2244     case RSPQ_SOP:
2245         DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2246         m->m_pkthdr.len = len;
2247         mh->mh_head = mh->mh_tail = m;
2248         m->m_flags |= M_PKTHDR;
2249         ret = 0;
2250         break;
2251     case RSPQ_EOP:
2252         DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2253         m->m_flags &= ~M_PKTHDR;
2254         mh->mh_head->m_pkthdr.len += len;
2255         mh->mh_tail->m_next = m;
2256         mh->mh_tail = m;
2257         ret = 1;
2258         break;
2259     }
2260     if (++fl->cidx == fl->size)
2261         fl->cidx = 0;
2262 
2263     return (ret);
2264 }
2265 
2266 #else
2267 static int
get_packet(adapter_t * adap,unsigned int drop_thres,struct sge_qset * qs,struct mbuf * m,struct rsp_desc * r)2268 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2269     struct mbuf *m, struct rsp_desc *r)
2270 {
2271 
2272     unsigned int len_cq =  ntohl(r->len_cq);
2273     struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2274     struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2275     uint32_t len = G_RSPD_LEN(len_cq);
2276     uint32_t flags = ntohl(r->flags);
2277     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2278     void *cl;
2279     int ret = 0;
2280 
2281     prefetch(sd->cl);
2282 
2283     fl->credits--;
2284     bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
2285 
2286     if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2287         cl = mtod(m, void *);
2288         memcpy(cl, sd->cl, len);
2289         recycle_rx_buf(adap, fl, fl->cidx);
2290     } else {
2291         cl = sd->cl;
2292         bus_dmamap_unload(fl->entry_tag, sd->map);
2293     }
2294     switch(sopeop) {
2295     case RSPQ_SOP_EOP:
2296         DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2297         m->m_len = m->m_pkthdr.len = len;
2298         if (cl == sd->cl)
2299             m_cljset(m, cl, fl->type);
2300         ret = 1;
2301         goto done;
2302         break;
2303     case RSPQ_NSOP_NEOP:
2304         DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2305         ret = 0;
2306         break;
2307     case RSPQ_SOP:
2308         DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2309         m_iovinit(m);
2310         ret = 0;
2311         break;
2312     case RSPQ_EOP:
2313         DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2314         ret = 1;
2315         break;
2316     }
2317     m_iovappend(m, cl, fl->buf_size, len, 0);
2318 
2319 done:
2320     if (++fl->cidx == fl->size)
2321         fl->cidx = 0;
2322 
2323     return (ret);
2324 }
2325 #endif
2326 /**
2327  *  handle_rsp_cntrl_info - handles control information in a response
2328  *  @qs: the queue set corresponding to the response
2329  *  @flags: the response control flags
2330  *
2331  *  Handles the control information of an SGE response, such as GTS
2332  *  indications and completion credits for the queue set's Tx queues.
2333  *  HW coalesces credits, we don't do any extra SW coalescing.
2334  */
2335 static __inline void
handle_rsp_cntrl_info(struct sge_qset * qs,uint32_t flags)2336 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2337 {
2338     unsigned int credits;
2339 
2340 #if USE_GTS
2341     if (flags & F_RSPD_TXQ0_GTS)
2342         clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2343 #endif
2344     credits = G_RSPD_TXQ0_CR(flags);
2345     if (credits) {
2346         qs->txq[TXQ_ETH].processed += credits;
2347         if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
2348             workqueue_enqueue(qs->port->timer_reclaim_task.wq,
2349                              &qs->port->timer_reclaim_task.w, NULL);
2350     }
2351 
2352     credits = G_RSPD_TXQ2_CR(flags);
2353     if (credits)
2354         qs->txq[TXQ_CTRL].processed += credits;
2355 
2356 # if USE_GTS
2357     if (flags & F_RSPD_TXQ1_GTS)
2358         clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2359 # endif
2360     credits = G_RSPD_TXQ1_CR(flags);
2361     if (credits)
2362         qs->txq[TXQ_OFLD].processed += credits;
2363 }
2364 
2365 static void
check_ring_db(adapter_t * adap,struct sge_qset * qs,unsigned int sleeping)2366 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2367     unsigned int sleeping)
2368 {
2369     ;
2370 }
2371 
2372 /**
2373  *  process_responses - process responses from an SGE response queue
2374  *  @adap: the adapter
2375  *  @qs: the queue set to which the response queue belongs
2376  *  @budget: how many responses can be processed in this round
2377  *
2378  *  Process responses from an SGE response queue up to the supplied budget.
2379  *  Responses include received packets as well as credits and other events
2380  *  for the queues that belong to the response queue's queue set.
2381  *  A negative budget is effectively unlimited.
2382  *
2383  *  Additionally choose the interrupt holdoff time for the next interrupt
2384  *  on this queue.  If the system is under memory shortage use a fairly
2385  *  long delay to help recovery.
2386  */
2387 static int
process_responses(adapter_t * adap,struct sge_qset * qs,int budget)2388 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2389 {
2390     struct sge_rspq *rspq = &qs->rspq;
2391     struct rsp_desc *r = &rspq->desc[rspq->cidx];
2392     int budget_left = budget;
2393     unsigned int sleeping = 0;
2394     int lro = qs->lro.enabled;
2395 #ifdef DEBUG
2396     static int last_holdoff = 0;
2397     if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2398         printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2399         last_holdoff = rspq->holdoff_tmr;
2400     }
2401 #endif
2402     rspq->next_holdoff = rspq->holdoff_tmr;
2403 
2404     while (__predict_true(budget_left && is_new_response(r, rspq))) {
2405         int eth, eop = 0, ethpad = 0;
2406         uint32_t flags = ntohl(r->flags);
2407         uint32_t rss_csum = *(const uint32_t *)r;
2408         uint32_t rss_hash = r->rss_hdr.rss_hash_val;
2409 
2410         eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2411 
2412         if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2413             /* XXX */
2414         } else if  (flags & F_RSPD_IMM_DATA_VALID) {
2415 #ifdef DISABLE_MBUF_IOVEC
2416             if (cxgb_debug)
2417                 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
2418 
2419             if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
2420                 rspq->next_holdoff = NOMEM_INTR_DELAY;
2421                 budget_left--;
2422                 break;
2423             } else {
2424                 eop = 1;
2425             }
2426 #else
2427             struct mbuf *m = NULL;
2428 
2429             if (rspq->rspq_mbuf == NULL)
2430                 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2431                         else
2432                 m = m_gethdr(M_DONTWAIT, MT_DATA);
2433 
2434             /*
2435              * XXX revisit me
2436              */
2437             if (rspq->rspq_mbuf == NULL &&  m == NULL) {
2438                 rspq->next_holdoff = NOMEM_INTR_DELAY;
2439                 budget_left--;
2440                 break;
2441             }
2442             if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
2443                 goto skip;
2444             eop = 1;
2445 #endif
2446             rspq->imm_data++;
2447         } else if (r->len_cq) {
2448             int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2449 
2450 #ifdef DISABLE_MBUF_IOVEC
2451             struct mbuf *m;
2452             m = m_gethdr(M_NOWAIT, MT_DATA);
2453 
2454             if (m == NULL) {
2455                 log(LOG_WARNING, "failed to get mbuf for packet\n");
2456                 break;
2457             }
2458 
2459             eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
2460 #else
2461             if (rspq->rspq_mbuf == NULL)
2462                 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2463             if (rspq->rspq_mbuf == NULL) {
2464                 log(LOG_WARNING, "failed to get mbuf for packet\n");
2465                 break;
2466             }
2467             eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
2468 #endif
2469             ethpad = 2;
2470         } else {
2471             DPRINTF("pure response\n");
2472             rspq->pure_rsps++;
2473         }
2474 
2475         if (flags & RSPD_CTRL_MASK) {
2476             sleeping |= flags & RSPD_GTS_MASK;
2477             handle_rsp_cntrl_info(qs, flags);
2478         }
2479 #ifndef DISABLE_MBUF_IOVEC
2480     skip:
2481 #endif
2482         r++;
2483         if (__predict_false(++rspq->cidx == rspq->size)) {
2484             rspq->cidx = 0;
2485             rspq->gen ^= 1;
2486             r = rspq->desc;
2487         }
2488 
2489         prefetch(r);
2490         if (++rspq->credits >= (rspq->size / 4)) {
2491             refill_rspq(adap, rspq, rspq->credits);
2492             rspq->credits = 0;
2493         }
2494 
2495         if (eop) {
2496             prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2497             prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2498 
2499             if (eth) {
2500                 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2501                     rss_hash, rss_csum, lro);
2502 
2503                 rspq->rspq_mh.mh_head = NULL;
2504             } else {
2505                 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2506                 /*
2507                  * XXX size mismatch
2508                  */
2509                 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2510             }
2511             __refill_fl(adap, &qs->fl[0]);
2512             __refill_fl(adap, &qs->fl[1]);
2513 
2514         }
2515         --budget_left;
2516     }
2517 
2518     t3_lro_flush(adap, qs, &qs->lro);
2519 
2520     if (sleeping)
2521         check_ring_db(adap, qs, sleeping);
2522 
2523     smp_mb();  /* commit Tx queue processed updates */
2524     if (__predict_false(qs->txq_stopped != 0))
2525         restart_tx(qs);
2526 
2527     budget -= budget_left;
2528     return (budget);
2529 }
2530 
2531 /*
2532  * A helper function that processes responses and issues GTS.
2533  */
2534 static __inline int
process_responses_gts(adapter_t * adap,struct sge_rspq * rq)2535 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2536 {
2537     int work;
2538     static int last_holdoff = 0;
2539 
2540     work = process_responses(adap, rspq_to_qset(rq), -1);
2541 
2542     if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2543         printf("next_holdoff=%d\n", rq->next_holdoff);
2544         last_holdoff = rq->next_holdoff;
2545     }
2546     if (work)
2547         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2548             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2549     return work;
2550 }
2551 
2552 
2553 /*
2554  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2555  * Handles data events from SGE response queues as well as error and other
2556  * async events as they all use the same interrupt pin.  We use one SGE
2557  * response queue per port in this mode and protect all response queues with
2558  * queue 0's lock.
2559  */
2560 int
t3b_intr(void * data)2561 t3b_intr(void *data)
2562 {
2563     uint32_t i, map;
2564     adapter_t *adap = data;
2565     struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2566 
2567     t3_write_reg(adap, A_PL_CLI, 0);
2568     map = t3_read_reg(adap, A_SG_DATA_INTR);
2569 
2570     if (!map)
2571         return (FALSE);
2572 
2573     if (__predict_false(map & F_ERRINTR))
2574         workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
2575 
2576     mtx_lock(&q0->lock);
2577     for_each_port(adap, i)
2578         if (map & (1 << i))
2579             process_responses_gts(adap, &adap->sge.qs[i].rspq);
2580     mtx_unlock(&q0->lock);
2581 
2582     return (TRUE);
2583 }
2584 
2585 /*
2586  * The MSI interrupt handler.  This needs to handle data events from SGE
2587  * response queues as well as error and other async events as they all use
2588  * the same MSI vector.  We use one SGE response queue per port in this mode
2589  * and protect all response queues with queue 0's lock.
2590  */
2591 int
t3_intr_msi(void * data)2592 t3_intr_msi(void *data)
2593 {
2594     adapter_t *adap = data;
2595     struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2596     int i, new_packets = 0;
2597 
2598     mtx_lock(&q0->lock);
2599 
2600     for_each_port(adap, i)
2601         if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2602             new_packets = 1;
2603     mtx_unlock(&q0->lock);
2604     if (new_packets == 0)
2605         workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
2606 
2607     return (TRUE);
2608 }
2609 
2610 int
t3_intr_msix(void * data)2611 t3_intr_msix(void *data)
2612 {
2613     struct sge_qset *qs = data;
2614     adapter_t *adap = qs->port->adapter;
2615     struct sge_rspq *rspq = &qs->rspq;
2616 
2617     mtx_lock(&rspq->lock);
2618     if (process_responses_gts(adap, rspq) == 0)
2619         rspq->unhandled_irqs++;
2620     mtx_unlock(&rspq->lock);
2621 
2622     return (TRUE);
2623 }
2624 
2625 /**
2626  *  t3_get_desc - dump an SGE descriptor for debugging purposes
2627  *  @qs: the queue set
2628  *  @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2629  *  @idx: the descriptor index in the queue
2630  *  @data: where to dump the descriptor contents
2631  *
2632  *  Dumps the contents of a HW descriptor of an SGE queue.  Returns the
2633  *  size of the descriptor.
2634  */
2635 int
t3_get_desc(const struct sge_qset * qs,unsigned int qnum,unsigned int idx,unsigned char * data)2636 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2637         unsigned char *data)
2638 {
2639     if (qnum >= 6)
2640         return (EINVAL);
2641 
2642     if (qnum < 3) {
2643         if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2644             return -EINVAL;
2645         memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2646         return sizeof(struct tx_desc);
2647     }
2648 
2649     if (qnum == 3) {
2650         if (!qs->rspq.desc || idx >= qs->rspq.size)
2651             return (EINVAL);
2652         memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2653         return sizeof(struct rsp_desc);
2654     }
2655 
2656     qnum -= 4;
2657     if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2658         return (EINVAL);
2659     memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2660     return sizeof(struct rx_desc);
2661 }
2662