xref: /qemu/hw/net/rocker/rocker.c (revision c6bd8c70)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "hw/hw.h"
19 #include "hw/pci/pci.h"
20 #include "hw/pci/msix.h"
21 #include "net/net.h"
22 #include "net/eth.h"
23 #include "qemu/iov.h"
24 #include "qemu/bitops.h"
25 #include "qmp-commands.h"
26 
27 #include "rocker.h"
28 #include "rocker_hw.h"
29 #include "rocker_fp.h"
30 #include "rocker_desc.h"
31 #include "rocker_tlv.h"
32 #include "rocker_world.h"
33 #include "rocker_of_dpa.h"
34 
35 struct rocker {
36     /* private */
37     PCIDevice parent_obj;
38     /* public */
39 
40     MemoryRegion mmio;
41     MemoryRegion msix_bar;
42 
43     /* switch configuration */
44     char *name;                  /* switch name */
45     uint32_t fp_ports;           /* front-panel port count */
46     NICPeers *fp_ports_peers;
47     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
48     uint64_t switch_id;          /* switch id */
49 
50     /* front-panel ports */
51     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
52 
53     /* register backings */
54     uint32_t test_reg;
55     uint64_t test_reg64;
56     dma_addr_t test_dma_addr;
57     uint32_t test_dma_size;
58     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
59 
60     /* desc rings */
61     DescRing **rings;
62 
63     /* switch worlds */
64     World *worlds[ROCKER_WORLD_TYPE_MAX];
65     World *world_dflt;
66 
67     QLIST_ENTRY(rocker) next;
68 };
69 
70 #define ROCKER "rocker"
71 
72 #define to_rocker(obj) \
73     OBJECT_CHECK(Rocker, (obj), ROCKER)
74 
75 static QLIST_HEAD(, rocker) rockers;
76 
77 Rocker *rocker_find(const char *name)
78 {
79     Rocker *r;
80 
81     QLIST_FOREACH(r, &rockers, next)
82         if (strcmp(r->name, name) == 0) {
83             return r;
84         }
85 
86     return NULL;
87 }
88 
89 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
90 {
91     if (type < ROCKER_WORLD_TYPE_MAX) {
92         return r->worlds[type];
93     }
94     return NULL;
95 }
96 
97 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
98 {
99     RockerSwitch *rocker = g_malloc0(sizeof(*rocker));
100     Rocker *r;
101 
102     r = rocker_find(name);
103     if (!r) {
104         error_set(errp, ERROR_CLASS_GENERIC_ERROR,
105                   "rocker %s not found", name);
106         return NULL;
107     }
108 
109     rocker->name = g_strdup(r->name);
110     rocker->id = r->switch_id;
111     rocker->ports = r->fp_ports;
112 
113     return rocker;
114 }
115 
116 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
117 {
118     RockerPortList *list = NULL;
119     Rocker *r;
120     int i;
121 
122     r = rocker_find(name);
123     if (!r) {
124         error_set(errp, ERROR_CLASS_GENERIC_ERROR,
125                   "rocker %s not found", name);
126         return NULL;
127     }
128 
129     for (i = r->fp_ports - 1; i >= 0; i--) {
130         RockerPortList *info = g_malloc0(sizeof(*info));
131         info->value = g_malloc0(sizeof(*info->value));
132         struct fp_port *port = r->fp_port[i];
133 
134         fp_port_get_info(port, info);
135         info->next = list;
136         list = info;
137     }
138 
139     return list;
140 }
141 
142 uint32_t rocker_fp_ports(Rocker *r)
143 {
144     return r->fp_ports;
145 }
146 
147 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
148                                             DescRing *ring)
149 {
150     return (desc_ring_index(ring) - 2) / 2 + 1;
151 }
152 
153 static int tx_consume(Rocker *r, DescInfo *info)
154 {
155     PCIDevice *dev = PCI_DEVICE(r);
156     char *buf = desc_get_buf(info, true);
157     RockerTlv *tlv_frag;
158     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
159     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
160     uint32_t pport;
161     uint32_t port;
162     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
163     uint16_t tx_l3_csum_off = 0;
164     uint16_t tx_tso_mss = 0;
165     uint16_t tx_tso_hdr_len = 0;
166     int iovcnt = 0;
167     int err = ROCKER_OK;
168     int rem;
169     int i;
170 
171     if (!buf) {
172         return -ROCKER_ENXIO;
173     }
174 
175     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
176 
177     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
178         return -ROCKER_EINVAL;
179     }
180 
181     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
182     if (!fp_port_from_pport(pport, &port)) {
183         return -ROCKER_EINVAL;
184     }
185 
186     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
187         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
188     }
189 
190     switch (tx_offload) {
191     case ROCKER_TX_OFFLOAD_L3_CSUM:
192         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
193             return -ROCKER_EINVAL;
194         }
195     case ROCKER_TX_OFFLOAD_TSO:
196         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
197             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
198             return -ROCKER_EINVAL;
199         }
200     }
201 
202     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
203         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
204     }
205 
206     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
207         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
208     }
209 
210     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
211         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
212     }
213 
214     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
215         hwaddr frag_addr;
216         uint16_t frag_len;
217 
218         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
219             err = -ROCKER_EINVAL;
220             goto err_bad_attr;
221         }
222 
223         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
224 
225         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
226             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
227             err = -ROCKER_EINVAL;
228             goto err_bad_attr;
229         }
230 
231         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
232         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
233 
234         iov[iovcnt].iov_len = frag_len;
235         iov[iovcnt].iov_base = g_malloc(frag_len);
236         if (!iov[iovcnt].iov_base) {
237             err = -ROCKER_ENOMEM;
238             goto err_no_mem;
239         }
240 
241         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
242                      iov[iovcnt].iov_len)) {
243             err = -ROCKER_ENXIO;
244             goto err_bad_io;
245         }
246 
247         if (++iovcnt > ROCKER_TX_FRAGS_MAX) {
248             goto err_too_many_frags;
249         }
250     }
251 
252     if (iovcnt) {
253         /* XXX perform Tx offloads */
254         /* XXX   silence compiler for now */
255         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
256     }
257 
258     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
259 
260 err_too_many_frags:
261 err_bad_io:
262 err_no_mem:
263 err_bad_attr:
264     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
265         if (iov[i].iov_base) {
266             g_free(iov[i].iov_base);
267         }
268     }
269 
270     return err;
271 }
272 
273 static int cmd_get_port_settings(Rocker *r,
274                                  DescInfo *info, char *buf,
275                                  RockerTlv *cmd_info_tlv)
276 {
277     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
278     RockerTlv *nest;
279     FpPort *fp_port;
280     uint32_t pport;
281     uint32_t port;
282     uint32_t speed;
283     uint8_t duplex;
284     uint8_t autoneg;
285     uint8_t learning;
286     char *phys_name;
287     MACAddr macaddr;
288     enum rocker_world_type mode;
289     size_t tlv_size;
290     int pos;
291     int err;
292 
293     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
294                             cmd_info_tlv);
295 
296     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
297         return -ROCKER_EINVAL;
298     }
299 
300     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
301     if (!fp_port_from_pport(pport, &port)) {
302         return -ROCKER_EINVAL;
303     }
304     fp_port = r->fp_port[port];
305 
306     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
307     if (err) {
308         return err;
309     }
310 
311     fp_port_get_macaddr(fp_port, &macaddr);
312     mode = world_type(fp_port_get_world(fp_port));
313     learning = fp_port_get_learning(fp_port);
314     phys_name = fp_port_get_name(fp_port);
315 
316     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
317                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
318                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
319                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
320                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
321                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
322                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
323                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
324                rocker_tlv_total_size(strlen(phys_name));
325 
326     if (tlv_size > desc_buf_size(info)) {
327         return -ROCKER_EMSGSIZE;
328     }
329 
330     pos = 0;
331     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
332     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
333     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
334     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
335     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
336     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
337                    sizeof(macaddr.a), macaddr.a);
338     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
339     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
340                       learning);
341     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
342                    strlen(phys_name), phys_name);
343     rocker_tlv_nest_end(buf, &pos, nest);
344 
345     return desc_set_buf(info, tlv_size);
346 }
347 
348 static int cmd_set_port_settings(Rocker *r,
349                                  RockerTlv *cmd_info_tlv)
350 {
351     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
352     FpPort *fp_port;
353     uint32_t pport;
354     uint32_t port;
355     uint32_t speed;
356     uint8_t duplex;
357     uint8_t autoneg;
358     uint8_t learning;
359     MACAddr macaddr;
360     enum rocker_world_type mode;
361     int err;
362 
363     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
364                             cmd_info_tlv);
365 
366     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
367         return -ROCKER_EINVAL;
368     }
369 
370     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
371     if (!fp_port_from_pport(pport, &port)) {
372         return -ROCKER_EINVAL;
373     }
374     fp_port = r->fp_port[port];
375 
376     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
377         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
378         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
379 
380         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
381         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
382         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
383 
384         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
385         if (err) {
386             return err;
387         }
388     }
389 
390     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
391         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
392             sizeof(macaddr.a)) {
393             return -ROCKER_EINVAL;
394         }
395         memcpy(macaddr.a,
396                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
397                sizeof(macaddr.a));
398         fp_port_set_macaddr(fp_port, &macaddr);
399     }
400 
401     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
402         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
403         fp_port_set_world(fp_port, r->worlds[mode]);
404     }
405 
406     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
407         learning =
408             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
409         fp_port_set_learning(fp_port, learning);
410     }
411 
412     return ROCKER_OK;
413 }
414 
415 static int cmd_consume(Rocker *r, DescInfo *info)
416 {
417     char *buf = desc_get_buf(info, false);
418     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
419     RockerTlv *info_tlv;
420     World *world;
421     uint16_t cmd;
422     int err;
423 
424     if (!buf) {
425         return -ROCKER_ENXIO;
426     }
427 
428     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
429 
430     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
431         return -ROCKER_EINVAL;
432     }
433 
434     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
435     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
436 
437     /* This might be reworked to something like this:
438      * Every world will have an array of command handlers from
439      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
440      * up to each world to implement whatever command it want.
441      * It can reference "generic" commands as cmd_set_port_settings or
442      * cmd_get_port_settings
443      */
444 
445     switch (cmd) {
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
454         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
455         err = world_do_cmd(world, info, buf, cmd, info_tlv);
456         break;
457     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
458         err = cmd_get_port_settings(r, info, buf, info_tlv);
459         break;
460     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
461         err = cmd_set_port_settings(r, info_tlv);
462         break;
463     default:
464         err = -ROCKER_EINVAL;
465         break;
466     }
467 
468     return err;
469 }
470 
471 static void rocker_msix_irq(Rocker *r, unsigned vector)
472 {
473     PCIDevice *dev = PCI_DEVICE(r);
474 
475     DPRINTF("MSI-X notify request for vector %d\n", vector);
476     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
477         DPRINTF("incorrect vector %d\n", vector);
478         return;
479     }
480     msix_notify(dev, vector);
481 }
482 
483 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
484 {
485     DescRing *ring = r->rings[ROCKER_RING_EVENT];
486     DescInfo *info = desc_ring_fetch_desc(ring);
487     RockerTlv *nest;
488     char *buf;
489     size_t tlv_size;
490     int pos;
491     int err;
492 
493     if (!info) {
494         return -ROCKER_ENOBUFS;
495     }
496 
497     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
498                rocker_tlv_total_size(0) +                 /* nest */
499                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
500                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
501 
502     if (tlv_size > desc_buf_size(info)) {
503         err = -ROCKER_EMSGSIZE;
504         goto err_too_big;
505     }
506 
507     buf = desc_get_buf(info, false);
508     if (!buf) {
509         err = -ROCKER_ENOMEM;
510         goto err_no_mem;
511     }
512 
513     pos = 0;
514     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
515                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
516     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
517     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
518     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
519                       link_up ? 1 : 0);
520     rocker_tlv_nest_end(buf, &pos, nest);
521 
522     err = desc_set_buf(info, tlv_size);
523 
524 err_too_big:
525 err_no_mem:
526     if (desc_ring_post_desc(ring, err)) {
527         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
528     }
529 
530     return err;
531 }
532 
533 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
534                                uint16_t vlan_id)
535 {
536     DescRing *ring = r->rings[ROCKER_RING_EVENT];
537     DescInfo *info;
538     FpPort *fp_port;
539     uint32_t port;
540     RockerTlv *nest;
541     char *buf;
542     size_t tlv_size;
543     int pos;
544     int err;
545 
546     if (!fp_port_from_pport(pport, &port)) {
547         return -ROCKER_EINVAL;
548     }
549     fp_port = r->fp_port[port];
550     if (!fp_port_get_learning(fp_port)) {
551         return ROCKER_OK;
552     }
553 
554     info = desc_ring_fetch_desc(ring);
555     if (!info) {
556         return -ROCKER_ENOBUFS;
557     }
558 
559     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
560                rocker_tlv_total_size(0) +                 /* nest */
561                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
562                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
563                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
564 
565     if (tlv_size > desc_buf_size(info)) {
566         err = -ROCKER_EMSGSIZE;
567         goto err_too_big;
568     }
569 
570     buf = desc_get_buf(info, false);
571     if (!buf) {
572         err = -ROCKER_ENOMEM;
573         goto err_no_mem;
574     }
575 
576     pos = 0;
577     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
578                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
579     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
580     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
581     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
582     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
583     rocker_tlv_nest_end(buf, &pos, nest);
584 
585     err = desc_set_buf(info, tlv_size);
586 
587 err_too_big:
588 err_no_mem:
589     if (desc_ring_post_desc(ring, err)) {
590         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
591     }
592 
593     return err;
594 }
595 
596 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
597                                                      uint32_t pport)
598 {
599     return r->rings[(pport - 1) * 2 + 3];
600 }
601 
602 int rx_produce(World *world, uint32_t pport,
603                const struct iovec *iov, int iovcnt)
604 {
605     Rocker *r = world_rocker(world);
606     PCIDevice *dev = (PCIDevice *)r;
607     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
608     DescInfo *info = desc_ring_fetch_desc(ring);
609     char *data;
610     size_t data_size = iov_size(iov, iovcnt);
611     char *buf;
612     uint16_t rx_flags = 0;
613     uint16_t rx_csum = 0;
614     size_t tlv_size;
615     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
616     hwaddr frag_addr;
617     uint16_t frag_max_len;
618     int pos;
619     int err;
620 
621     if (!info) {
622         return -ROCKER_ENOBUFS;
623     }
624 
625     buf = desc_get_buf(info, false);
626     if (!buf) {
627         err = -ROCKER_ENXIO;
628         goto out;
629     }
630     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
631 
632     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
633         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
634         err = -ROCKER_EINVAL;
635         goto out;
636     }
637 
638     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
639     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
640 
641     if (data_size > frag_max_len) {
642         err = -ROCKER_EMSGSIZE;
643         goto out;
644     }
645 
646     /* XXX calc rx flags/csum */
647 
648     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
649                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
650                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
651                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
652                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
653 
654     if (tlv_size > desc_buf_size(info)) {
655         err = -ROCKER_EMSGSIZE;
656         goto out;
657     }
658 
659     /* TODO:
660      * iov dma write can be optimized in similar way e1000 does it in
661      * e1000_receive_iov. But maybe if would make sense to introduce
662      * generic helper iov_dma_write.
663      */
664 
665     data = g_malloc(data_size);
666     if (!data) {
667         err = -ROCKER_ENOMEM;
668         goto out;
669     }
670     iov_to_buf(iov, iovcnt, 0, data, data_size);
671     pci_dma_write(dev, frag_addr, data, data_size);
672     g_free(data);
673 
674     pos = 0;
675     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
676     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
677     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
678     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
679     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
680 
681     err = desc_set_buf(info, tlv_size);
682 
683 out:
684     if (desc_ring_post_desc(ring, err)) {
685         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
686     }
687 
688     return err;
689 }
690 
691 int rocker_port_eg(Rocker *r, uint32_t pport,
692                    const struct iovec *iov, int iovcnt)
693 {
694     FpPort *fp_port;
695     uint32_t port;
696 
697     if (!fp_port_from_pport(pport, &port)) {
698         return -ROCKER_EINVAL;
699     }
700 
701     fp_port = r->fp_port[port];
702 
703     return fp_port_eg(fp_port, iov, iovcnt);
704 }
705 
706 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
707 {
708     PCIDevice *dev = PCI_DEVICE(r);
709     char *buf;
710     int i;
711 
712     buf = g_malloc(r->test_dma_size);
713 
714     if (!buf) {
715         DPRINTF("test dma buffer alloc failed");
716         return;
717     }
718 
719     switch (val) {
720     case ROCKER_TEST_DMA_CTRL_CLEAR:
721         memset(buf, 0, r->test_dma_size);
722         break;
723     case ROCKER_TEST_DMA_CTRL_FILL:
724         memset(buf, 0x96, r->test_dma_size);
725         break;
726     case ROCKER_TEST_DMA_CTRL_INVERT:
727         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
728         for (i = 0; i < r->test_dma_size; i++) {
729             buf[i] = ~buf[i];
730         }
731         break;
732     default:
733         DPRINTF("not test dma control val=0x%08x\n", val);
734         goto err_out;
735     }
736     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
737 
738     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
739 
740 err_out:
741     g_free(buf);
742 }
743 
744 static void rocker_reset(DeviceState *dev);
745 
746 static void rocker_control(Rocker *r, uint32_t val)
747 {
748     if (val & ROCKER_CONTROL_RESET) {
749         rocker_reset(DEVICE(r));
750     }
751 }
752 
753 static int rocker_pci_ring_count(Rocker *r)
754 {
755     /* There are:
756      * - command ring
757      * - event ring
758      * - tx and rx ring per each port
759      */
760     return 2 + (2 * r->fp_ports);
761 }
762 
763 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
764 {
765     hwaddr start = ROCKER_DMA_DESC_BASE;
766     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
767 
768     return addr >= start && addr < end;
769 }
770 
771 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
772 {
773     int i;
774     bool old_enabled;
775     bool new_enabled;
776     FpPort *fp_port;
777 
778     for (i = 0; i < r->fp_ports; i++) {
779         fp_port = r->fp_port[i];
780         old_enabled = fp_port_enabled(fp_port);
781         new_enabled = (new >> (i + 1)) & 0x1;
782         if (new_enabled == old_enabled) {
783             continue;
784         }
785         if (new_enabled) {
786             fp_port_enable(r->fp_port[i]);
787         } else {
788             fp_port_disable(r->fp_port[i]);
789         }
790     }
791 }
792 
793 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
794 {
795     Rocker *r = opaque;
796 
797     if (rocker_addr_is_desc_reg(r, addr)) {
798         unsigned index = ROCKER_RING_INDEX(addr);
799         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
800 
801         switch (offset) {
802         case ROCKER_DMA_DESC_ADDR_OFFSET:
803             r->lower32 = (uint64_t)val;
804             break;
805         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
806             desc_ring_set_base_addr(r->rings[index],
807                                     ((uint64_t)val) << 32 | r->lower32);
808             r->lower32 = 0;
809             break;
810         case ROCKER_DMA_DESC_SIZE_OFFSET:
811             desc_ring_set_size(r->rings[index], val);
812             break;
813         case ROCKER_DMA_DESC_HEAD_OFFSET:
814             if (desc_ring_set_head(r->rings[index], val)) {
815                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
816             }
817             break;
818         case ROCKER_DMA_DESC_CTRL_OFFSET:
819             desc_ring_set_ctrl(r->rings[index], val);
820             break;
821         case ROCKER_DMA_DESC_CREDITS_OFFSET:
822             if (desc_ring_ret_credits(r->rings[index], val)) {
823                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
824             }
825             break;
826         default:
827             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
828                     " val=0x%08x (ring %d, addr=0x%02x)\n",
829                     addr, val, index, offset);
830             break;
831         }
832         return;
833     }
834 
835     switch (addr) {
836     case ROCKER_TEST_REG:
837         r->test_reg = val;
838         break;
839     case ROCKER_TEST_REG64:
840     case ROCKER_TEST_DMA_ADDR:
841     case ROCKER_PORT_PHYS_ENABLE:
842         r->lower32 = (uint64_t)val;
843         break;
844     case ROCKER_TEST_REG64 + 4:
845         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
846         r->lower32 = 0;
847         break;
848     case ROCKER_TEST_IRQ:
849         rocker_msix_irq(r, val);
850         break;
851     case ROCKER_TEST_DMA_SIZE:
852         r->test_dma_size = val;
853         break;
854     case ROCKER_TEST_DMA_ADDR + 4:
855         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
856         r->lower32 = 0;
857         break;
858     case ROCKER_TEST_DMA_CTRL:
859         rocker_test_dma_ctrl(r, val);
860         break;
861     case ROCKER_CONTROL:
862         rocker_control(r, val);
863         break;
864     case ROCKER_PORT_PHYS_ENABLE + 4:
865         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
866         r->lower32 = 0;
867         break;
868     default:
869         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
870                 " val=0x%08x\n", addr, val);
871         break;
872     }
873 }
874 
875 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
876 {
877     Rocker *r = opaque;
878 
879     if (rocker_addr_is_desc_reg(r, addr)) {
880         unsigned index = ROCKER_RING_INDEX(addr);
881         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
882 
883         switch (offset) {
884         case ROCKER_DMA_DESC_ADDR_OFFSET:
885             desc_ring_set_base_addr(r->rings[index], val);
886             break;
887         default:
888             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
889                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
890                     addr, val, index, offset);
891             break;
892         }
893         return;
894     }
895 
896     switch (addr) {
897     case ROCKER_TEST_REG64:
898         r->test_reg64 = val;
899         break;
900     case ROCKER_TEST_DMA_ADDR:
901         r->test_dma_addr = val;
902         break;
903     case ROCKER_PORT_PHYS_ENABLE:
904         rocker_port_phys_enable_write(r, val);
905         break;
906     default:
907         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
908                 " val=0x" TARGET_FMT_plx "\n", addr, val);
909         break;
910     }
911 }
912 
913 #ifdef DEBUG_ROCKER
914 #define regname(reg) case (reg): return #reg
915 static const char *rocker_reg_name(void *opaque, hwaddr addr)
916 {
917     Rocker *r = opaque;
918 
919     if (rocker_addr_is_desc_reg(r, addr)) {
920         unsigned index = ROCKER_RING_INDEX(addr);
921         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
922         static char buf[100];
923         char ring_name[10];
924 
925         switch (index) {
926         case 0:
927             sprintf(ring_name, "cmd");
928             break;
929         case 1:
930             sprintf(ring_name, "event");
931             break;
932         default:
933             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
934                     (index - 2) / 2);
935         }
936 
937         switch (offset) {
938         case ROCKER_DMA_DESC_ADDR_OFFSET:
939             sprintf(buf, "Ring[%s] ADDR", ring_name);
940             return buf;
941         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
942             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
943             return buf;
944         case ROCKER_DMA_DESC_SIZE_OFFSET:
945             sprintf(buf, "Ring[%s] SIZE", ring_name);
946             return buf;
947         case ROCKER_DMA_DESC_HEAD_OFFSET:
948             sprintf(buf, "Ring[%s] HEAD", ring_name);
949             return buf;
950         case ROCKER_DMA_DESC_TAIL_OFFSET:
951             sprintf(buf, "Ring[%s] TAIL", ring_name);
952             return buf;
953         case ROCKER_DMA_DESC_CTRL_OFFSET:
954             sprintf(buf, "Ring[%s] CTRL", ring_name);
955             return buf;
956         case ROCKER_DMA_DESC_CREDITS_OFFSET:
957             sprintf(buf, "Ring[%s] CREDITS", ring_name);
958             return buf;
959         default:
960             sprintf(buf, "Ring[%s] ???", ring_name);
961             return buf;
962         }
963     } else {
964         switch (addr) {
965             regname(ROCKER_BOGUS_REG0);
966             regname(ROCKER_BOGUS_REG1);
967             regname(ROCKER_BOGUS_REG2);
968             regname(ROCKER_BOGUS_REG3);
969             regname(ROCKER_TEST_REG);
970             regname(ROCKER_TEST_REG64);
971             regname(ROCKER_TEST_REG64+4);
972             regname(ROCKER_TEST_IRQ);
973             regname(ROCKER_TEST_DMA_ADDR);
974             regname(ROCKER_TEST_DMA_ADDR+4);
975             regname(ROCKER_TEST_DMA_SIZE);
976             regname(ROCKER_TEST_DMA_CTRL);
977             regname(ROCKER_CONTROL);
978             regname(ROCKER_PORT_PHYS_COUNT);
979             regname(ROCKER_PORT_PHYS_LINK_STATUS);
980             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
981             regname(ROCKER_PORT_PHYS_ENABLE);
982             regname(ROCKER_PORT_PHYS_ENABLE+4);
983             regname(ROCKER_SWITCH_ID);
984             regname(ROCKER_SWITCH_ID+4);
985         }
986     }
987     return "???";
988 }
989 #else
990 static const char *rocker_reg_name(void *opaque, hwaddr addr)
991 {
992     return NULL;
993 }
994 #endif
995 
996 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
997                               unsigned size)
998 {
999     DPRINTF("Write %s addr " TARGET_FMT_plx
1000             ", size %u, val " TARGET_FMT_plx "\n",
1001             rocker_reg_name(opaque, addr), addr, size, val);
1002 
1003     switch (size) {
1004     case 4:
1005         rocker_io_writel(opaque, addr, val);
1006         break;
1007     case 8:
1008         rocker_io_writeq(opaque, addr, val);
1009         break;
1010     }
1011 }
1012 
1013 static uint64_t rocker_port_phys_link_status(Rocker *r)
1014 {
1015     int i;
1016     uint64_t status = 0;
1017 
1018     for (i = 0; i < r->fp_ports; i++) {
1019         FpPort *port = r->fp_port[i];
1020 
1021         if (fp_port_get_link_up(port)) {
1022             status |= 1 << (i + 1);
1023         }
1024     }
1025     return status;
1026 }
1027 
1028 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1029 {
1030     int i;
1031     uint64_t ret = 0;
1032 
1033     for (i = 0; i < r->fp_ports; i++) {
1034         FpPort *port = r->fp_port[i];
1035 
1036         if (fp_port_enabled(port)) {
1037             ret |= 1 << (i + 1);
1038         }
1039     }
1040     return ret;
1041 }
1042 
1043 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1044 {
1045     Rocker *r = opaque;
1046     uint32_t ret;
1047 
1048     if (rocker_addr_is_desc_reg(r, addr)) {
1049         unsigned index = ROCKER_RING_INDEX(addr);
1050         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1051 
1052         switch (offset) {
1053         case ROCKER_DMA_DESC_ADDR_OFFSET:
1054             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1055             break;
1056         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1057             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1058             break;
1059         case ROCKER_DMA_DESC_SIZE_OFFSET:
1060             ret = desc_ring_get_size(r->rings[index]);
1061             break;
1062         case ROCKER_DMA_DESC_HEAD_OFFSET:
1063             ret = desc_ring_get_head(r->rings[index]);
1064             break;
1065         case ROCKER_DMA_DESC_TAIL_OFFSET:
1066             ret = desc_ring_get_tail(r->rings[index]);
1067             break;
1068         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1069             ret = desc_ring_get_credits(r->rings[index]);
1070             break;
1071         default:
1072             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1073                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1074             ret = 0;
1075             break;
1076         }
1077         return ret;
1078     }
1079 
1080     switch (addr) {
1081     case ROCKER_BOGUS_REG0:
1082     case ROCKER_BOGUS_REG1:
1083     case ROCKER_BOGUS_REG2:
1084     case ROCKER_BOGUS_REG3:
1085         ret = 0xDEADBABE;
1086         break;
1087     case ROCKER_TEST_REG:
1088         ret = r->test_reg * 2;
1089         break;
1090     case ROCKER_TEST_REG64:
1091         ret = (uint32_t)(r->test_reg64 * 2);
1092         break;
1093     case ROCKER_TEST_REG64 + 4:
1094         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1095         break;
1096     case ROCKER_TEST_DMA_SIZE:
1097         ret = r->test_dma_size;
1098         break;
1099     case ROCKER_TEST_DMA_ADDR:
1100         ret = (uint32_t)r->test_dma_addr;
1101         break;
1102     case ROCKER_TEST_DMA_ADDR + 4:
1103         ret = (uint32_t)(r->test_dma_addr >> 32);
1104         break;
1105     case ROCKER_PORT_PHYS_COUNT:
1106         ret = r->fp_ports;
1107         break;
1108     case ROCKER_PORT_PHYS_LINK_STATUS:
1109         ret = (uint32_t)rocker_port_phys_link_status(r);
1110         break;
1111     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1112         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1113         break;
1114     case ROCKER_PORT_PHYS_ENABLE:
1115         ret = (uint32_t)rocker_port_phys_enable_read(r);
1116         break;
1117     case ROCKER_PORT_PHYS_ENABLE + 4:
1118         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1119         break;
1120     case ROCKER_SWITCH_ID:
1121         ret = (uint32_t)r->switch_id;
1122         break;
1123     case ROCKER_SWITCH_ID + 4:
1124         ret = (uint32_t)(r->switch_id >> 32);
1125         break;
1126     default:
1127         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1128         ret = 0;
1129         break;
1130     }
1131     return ret;
1132 }
1133 
1134 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1135 {
1136     Rocker *r = opaque;
1137     uint64_t ret;
1138 
1139     if (rocker_addr_is_desc_reg(r, addr)) {
1140         unsigned index = ROCKER_RING_INDEX(addr);
1141         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1142 
1143         switch (addr & ROCKER_DMA_DESC_MASK) {
1144         case ROCKER_DMA_DESC_ADDR_OFFSET:
1145             ret = desc_ring_get_base_addr(r->rings[index]);
1146             break;
1147         default:
1148             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1149                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1150             ret = 0;
1151             break;
1152         }
1153         return ret;
1154     }
1155 
1156     switch (addr) {
1157     case ROCKER_BOGUS_REG0:
1158     case ROCKER_BOGUS_REG2:
1159         ret = 0xDEADBABEDEADBABEULL;
1160         break;
1161     case ROCKER_TEST_REG64:
1162         ret = r->test_reg64 * 2;
1163         break;
1164     case ROCKER_TEST_DMA_ADDR:
1165         ret = r->test_dma_addr;
1166         break;
1167     case ROCKER_PORT_PHYS_LINK_STATUS:
1168         ret = rocker_port_phys_link_status(r);
1169         break;
1170     case ROCKER_PORT_PHYS_ENABLE:
1171         ret = rocker_port_phys_enable_read(r);
1172         break;
1173     case ROCKER_SWITCH_ID:
1174         ret = r->switch_id;
1175         break;
1176     default:
1177         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1178         ret = 0;
1179         break;
1180     }
1181     return ret;
1182 }
1183 
1184 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1185 {
1186     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1187             rocker_reg_name(opaque, addr), addr, size);
1188 
1189     switch (size) {
1190     case 4:
1191         return rocker_io_readl(opaque, addr);
1192     case 8:
1193         return rocker_io_readq(opaque, addr);
1194     }
1195 
1196     return -1;
1197 }
1198 
1199 static const MemoryRegionOps rocker_mmio_ops = {
1200     .read = rocker_mmio_read,
1201     .write = rocker_mmio_write,
1202     .endianness = DEVICE_LITTLE_ENDIAN,
1203     .valid = {
1204         .min_access_size = 4,
1205         .max_access_size = 8,
1206     },
1207     .impl = {
1208         .min_access_size = 4,
1209         .max_access_size = 8,
1210     },
1211 };
1212 
1213 static void rocker_msix_vectors_unuse(Rocker *r,
1214                                       unsigned int num_vectors)
1215 {
1216     PCIDevice *dev = PCI_DEVICE(r);
1217     int i;
1218 
1219     for (i = 0; i < num_vectors; i++) {
1220         msix_vector_unuse(dev, i);
1221     }
1222 }
1223 
1224 static int rocker_msix_vectors_use(Rocker *r,
1225                                    unsigned int num_vectors)
1226 {
1227     PCIDevice *dev = PCI_DEVICE(r);
1228     int err;
1229     int i;
1230 
1231     for (i = 0; i < num_vectors; i++) {
1232         err = msix_vector_use(dev, i);
1233         if (err) {
1234             goto rollback;
1235         }
1236     }
1237     return 0;
1238 
1239 rollback:
1240     rocker_msix_vectors_unuse(r, i);
1241     return err;
1242 }
1243 
1244 static int rocker_msix_init(Rocker *r)
1245 {
1246     PCIDevice *dev = PCI_DEVICE(r);
1247     int err;
1248 
1249     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1250                     &r->msix_bar,
1251                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1252                     &r->msix_bar,
1253                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1254                     0);
1255     if (err) {
1256         return err;
1257     }
1258 
1259     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1260     if (err) {
1261         goto err_msix_vectors_use;
1262     }
1263 
1264     return 0;
1265 
1266 err_msix_vectors_use:
1267     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1268     return err;
1269 }
1270 
1271 static void rocker_msix_uninit(Rocker *r)
1272 {
1273     PCIDevice *dev = PCI_DEVICE(r);
1274 
1275     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1276     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1277 }
1278 
1279 static int pci_rocker_init(PCIDevice *dev)
1280 {
1281     Rocker *r = to_rocker(dev);
1282     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1283     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1284     static int sw_index;
1285     int i, err = 0;
1286 
1287     /* allocate worlds */
1288 
1289     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1290     r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
1291 
1292     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1293         if (!r->worlds[i]) {
1294             goto err_world_alloc;
1295         }
1296     }
1297 
1298     /* set up memory-mapped region at BAR0 */
1299 
1300     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1301                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1302     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1303                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1304 
1305     /* set up memory-mapped region for MSI-X */
1306 
1307     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1308                        ROCKER_PCI_MSIX_BAR_SIZE);
1309     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1310                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1311 
1312     /* MSI-X init */
1313 
1314     err = rocker_msix_init(r);
1315     if (err) {
1316         goto err_msix_init;
1317     }
1318 
1319     /* validate switch properties */
1320 
1321     if (!r->name) {
1322         r->name = g_strdup(ROCKER);
1323     }
1324 
1325     if (rocker_find(r->name)) {
1326         err = -EEXIST;
1327         goto err_duplicate;
1328     }
1329 
1330     /* Rocker name is passed in port name requests to OS with the intention
1331      * that the name is used in interface names. Limit the length of the
1332      * rocker name to avoid naming problems in the OS. Also, adding the
1333      * port number as p# and unganged breakout b#, where # is at most 2
1334      * digits, so leave room for it too (-1 for string terminator, -3 for
1335      * p# and -3 for b#)
1336      */
1337 #define ROCKER_IFNAMSIZ 16
1338 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1339     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1340         fprintf(stderr,
1341                 "rocker: name too long; please shorten to at most %d chars\n",
1342                 MAX_ROCKER_NAME_LEN);
1343         return -EINVAL;
1344     }
1345 
1346     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1347         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1348         r->fp_start_macaddr.a[4] += (sw_index++);
1349     }
1350 
1351     if (!r->switch_id) {
1352         memcpy(&r->switch_id, &r->fp_start_macaddr,
1353                sizeof(r->fp_start_macaddr));
1354     }
1355 
1356     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1357         r->fp_ports = ROCKER_FP_PORTS_MAX;
1358     }
1359 
1360     r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r));
1361     if (!r->rings) {
1362         goto err_rings_alloc;
1363     }
1364 
1365     /* Rings are ordered like this:
1366      * - command ring
1367      * - event ring
1368      * - port0 tx ring
1369      * - port0 rx ring
1370      * - port1 tx ring
1371      * - port1 rx ring
1372      * .....
1373      */
1374 
1375     err = -ENOMEM;
1376     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1377         DescRing *ring = desc_ring_alloc(r, i);
1378 
1379         if (!ring) {
1380             goto err_ring_alloc;
1381         }
1382 
1383         if (i == ROCKER_RING_CMD) {
1384             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1385         } else if (i == ROCKER_RING_EVENT) {
1386             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1387         } else if (i % 2 == 0) {
1388             desc_ring_set_consume(ring, tx_consume,
1389                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1390         } else if (i % 2 == 1) {
1391             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1392         }
1393 
1394         r->rings[i] = ring;
1395     }
1396 
1397     for (i = 0; i < r->fp_ports; i++) {
1398         FpPort *port =
1399             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1400                           i, &r->fp_ports_peers[i]);
1401 
1402         if (!port) {
1403             goto err_port_alloc;
1404         }
1405 
1406         r->fp_port[i] = port;
1407         fp_port_set_world(port, r->world_dflt);
1408     }
1409 
1410     QLIST_INSERT_HEAD(&rockers, r, next);
1411 
1412     return 0;
1413 
1414 err_port_alloc:
1415     for (--i; i >= 0; i--) {
1416         FpPort *port = r->fp_port[i];
1417         fp_port_free(port);
1418     }
1419     i = rocker_pci_ring_count(r);
1420 err_ring_alloc:
1421     for (--i; i >= 0; i--) {
1422         desc_ring_free(r->rings[i]);
1423     }
1424     g_free(r->rings);
1425 err_rings_alloc:
1426 err_duplicate:
1427     rocker_msix_uninit(r);
1428 err_msix_init:
1429     object_unparent(OBJECT(&r->msix_bar));
1430     object_unparent(OBJECT(&r->mmio));
1431 err_world_alloc:
1432     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1433         if (r->worlds[i]) {
1434             world_free(r->worlds[i]);
1435         }
1436     }
1437     return err;
1438 }
1439 
1440 static void pci_rocker_uninit(PCIDevice *dev)
1441 {
1442     Rocker *r = to_rocker(dev);
1443     int i;
1444 
1445     QLIST_REMOVE(r, next);
1446 
1447     for (i = 0; i < r->fp_ports; i++) {
1448         FpPort *port = r->fp_port[i];
1449 
1450         fp_port_free(port);
1451         r->fp_port[i] = NULL;
1452     }
1453 
1454     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1455         if (r->rings[i]) {
1456             desc_ring_free(r->rings[i]);
1457         }
1458     }
1459     g_free(r->rings);
1460 
1461     rocker_msix_uninit(r);
1462     object_unparent(OBJECT(&r->msix_bar));
1463     object_unparent(OBJECT(&r->mmio));
1464 
1465     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1466         if (r->worlds[i]) {
1467             world_free(r->worlds[i]);
1468         }
1469     }
1470     g_free(r->fp_ports_peers);
1471 }
1472 
1473 static void rocker_reset(DeviceState *dev)
1474 {
1475     Rocker *r = to_rocker(dev);
1476     int i;
1477 
1478     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1479         if (r->worlds[i]) {
1480             world_reset(r->worlds[i]);
1481         }
1482     }
1483     for (i = 0; i < r->fp_ports; i++) {
1484         fp_port_reset(r->fp_port[i]);
1485         fp_port_set_world(r->fp_port[i], r->world_dflt);
1486     }
1487 
1488     r->test_reg = 0;
1489     r->test_reg64 = 0;
1490     r->test_dma_addr = 0;
1491     r->test_dma_size = 0;
1492 
1493     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1494         desc_ring_reset(r->rings[i]);
1495     }
1496 
1497     DPRINTF("Reset done\n");
1498 }
1499 
1500 static Property rocker_properties[] = {
1501     DEFINE_PROP_STRING("name", Rocker, name),
1502     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1503                         fp_start_macaddr),
1504     DEFINE_PROP_UINT64("switch_id", Rocker,
1505                        switch_id, 0),
1506     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1507                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1508     DEFINE_PROP_END_OF_LIST(),
1509 };
1510 
1511 static const VMStateDescription rocker_vmsd = {
1512     .name = ROCKER,
1513     .unmigratable = 1,
1514 };
1515 
1516 static void rocker_class_init(ObjectClass *klass, void *data)
1517 {
1518     DeviceClass *dc = DEVICE_CLASS(klass);
1519     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1520 
1521     k->init = pci_rocker_init;
1522     k->exit = pci_rocker_uninit;
1523     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1524     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1525     k->revision = ROCKER_PCI_REVISION;
1526     k->class_id = PCI_CLASS_NETWORK_OTHER;
1527     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1528     dc->desc = "Rocker Switch";
1529     dc->reset = rocker_reset;
1530     dc->props = rocker_properties;
1531     dc->vmsd = &rocker_vmsd;
1532 }
1533 
1534 static const TypeInfo rocker_info = {
1535     .name          = ROCKER,
1536     .parent        = TYPE_PCI_DEVICE,
1537     .instance_size = sizeof(Rocker),
1538     .class_init    = rocker_class_init,
1539 };
1540 
1541 static void rocker_register_types(void)
1542 {
1543     type_register_static(&rocker_info);
1544 }
1545 
1546 type_init(rocker_register_types)
1547