xref: /qemu/hw/net/rocker/rocker.c (revision f917eed3)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/pci/pci.h"
20 #include "hw/qdev-properties.h"
21 #include "migration/vmstate.h"
22 #include "hw/pci/msix.h"
23 #include "net/net.h"
24 #include "net/eth.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-commands-rocker.h"
27 #include "qemu/iov.h"
28 #include "qemu/module.h"
29 #include "qemu/bitops.h"
30 #include "qemu/log.h"
31 
32 #include "rocker.h"
33 #include "rocker_hw.h"
34 #include "rocker_fp.h"
35 #include "rocker_desc.h"
36 #include "rocker_tlv.h"
37 #include "rocker_world.h"
38 #include "rocker_of_dpa.h"
39 
40 struct rocker {
41     /* private */
42     PCIDevice parent_obj;
43     /* public */
44 
45     MemoryRegion mmio;
46     MemoryRegion msix_bar;
47 
48     /* switch configuration */
49     char *name;                  /* switch name */
50     char *world_name;            /* world name */
51     uint32_t fp_ports;           /* front-panel port count */
52     NICPeers *fp_ports_peers;
53     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
54     uint64_t switch_id;          /* switch id */
55 
56     /* front-panel ports */
57     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
58 
59     /* register backings */
60     uint32_t test_reg;
61     uint64_t test_reg64;
62     dma_addr_t test_dma_addr;
63     uint32_t test_dma_size;
64     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
65 
66     /* desc rings */
67     DescRing **rings;
68 
69     /* switch worlds */
70     World *worlds[ROCKER_WORLD_TYPE_MAX];
71     World *world_dflt;
72 
73     QLIST_ENTRY(rocker) next;
74 };
75 
76 static QLIST_HEAD(, rocker) rockers;
77 
78 Rocker *rocker_find(const char *name)
79 {
80     Rocker *r;
81 
82     QLIST_FOREACH(r, &rockers, next)
83         if (strcmp(r->name, name) == 0) {
84             return r;
85         }
86 
87     return NULL;
88 }
89 
90 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
91 {
92     if (type < ROCKER_WORLD_TYPE_MAX) {
93         return r->worlds[type];
94     }
95     return NULL;
96 }
97 
98 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
99 {
100     RockerSwitch *rocker;
101     Rocker *r;
102 
103     r = rocker_find(name);
104     if (!r) {
105         error_setg(errp, "rocker %s not found", name);
106         return NULL;
107     }
108 
109     rocker = g_new0(RockerSwitch, 1);
110     rocker->name = g_strdup(r->name);
111     rocker->id = r->switch_id;
112     rocker->ports = r->fp_ports;
113 
114     return rocker;
115 }
116 
117 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
118 {
119     RockerPortList *list = NULL;
120     Rocker *r;
121     int i;
122 
123     r = rocker_find(name);
124     if (!r) {
125         error_setg(errp, "rocker %s not found", name);
126         return NULL;
127     }
128 
129     for (i = r->fp_ports - 1; i >= 0; i--) {
130         QAPI_LIST_PREPEND(list, fp_port_get_info(r->fp_port[i]));
131     }
132 
133     return list;
134 }
135 
136 uint32_t rocker_fp_ports(Rocker *r)
137 {
138     return r->fp_ports;
139 }
140 
141 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
142                                             DescRing *ring)
143 {
144     return (desc_ring_index(ring) - 2) / 2 + 1;
145 }
146 
147 static int tx_consume(Rocker *r, DescInfo *info)
148 {
149     PCIDevice *dev = PCI_DEVICE(r);
150     char *buf = desc_get_buf(info, true);
151     RockerTlv *tlv_frag;
152     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
153     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
154     uint32_t pport;
155     uint32_t port;
156     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
157     uint16_t tx_l3_csum_off = 0;
158     uint16_t tx_tso_mss = 0;
159     uint16_t tx_tso_hdr_len = 0;
160     int iovcnt = 0;
161     int err = ROCKER_OK;
162     int rem;
163     int i;
164 
165     if (!buf) {
166         return -ROCKER_ENXIO;
167     }
168 
169     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
170 
171     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
172         return -ROCKER_EINVAL;
173     }
174 
175     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
176     if (!fp_port_from_pport(pport, &port)) {
177         return -ROCKER_EINVAL;
178     }
179 
180     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
181         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
182     }
183 
184     switch (tx_offload) {
185     case ROCKER_TX_OFFLOAD_L3_CSUM:
186         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
187             return -ROCKER_EINVAL;
188         }
189         break;
190     case ROCKER_TX_OFFLOAD_TSO:
191         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
192             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
193             return -ROCKER_EINVAL;
194         }
195         break;
196     }
197 
198     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
199         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
200         qemu_log_mask(LOG_UNIMP, "rocker %s: L3 not implemented"
201                                  " (cksum off: %u)\n",
202                       __func__, tx_l3_csum_off);
203     }
204 
205     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
206         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
207         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented (MSS: %u)\n",
208                       __func__, tx_tso_mss);
209     }
210 
211     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
212         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
213         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented"
214                                  " (hdr length: %u)\n",
215                       __func__, tx_tso_hdr_len);
216     }
217 
218     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
219         hwaddr frag_addr;
220         uint16_t frag_len;
221 
222         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
223             err = -ROCKER_EINVAL;
224             goto err_bad_attr;
225         }
226 
227         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
228 
229         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
230             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
231             err = -ROCKER_EINVAL;
232             goto err_bad_attr;
233         }
234 
235         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
236         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
237 
238         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
239             goto err_too_many_frags;
240         }
241         iov[iovcnt].iov_len = frag_len;
242         iov[iovcnt].iov_base = g_malloc(frag_len);
243 
244         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
245                      iov[iovcnt].iov_len);
246 
247         iovcnt++;
248     }
249 
250     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
251 
252 err_too_many_frags:
253 err_bad_attr:
254     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
255         g_free(iov[i].iov_base);
256     }
257 
258     return err;
259 }
260 
261 static int cmd_get_port_settings(Rocker *r,
262                                  DescInfo *info, char *buf,
263                                  RockerTlv *cmd_info_tlv)
264 {
265     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
266     RockerTlv *nest;
267     FpPort *fp_port;
268     uint32_t pport;
269     uint32_t port;
270     uint32_t speed;
271     uint8_t duplex;
272     uint8_t autoneg;
273     uint8_t learning;
274     char *phys_name;
275     MACAddr macaddr;
276     enum rocker_world_type mode;
277     size_t tlv_size;
278     int pos;
279     int err;
280 
281     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
282                             cmd_info_tlv);
283 
284     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
285         return -ROCKER_EINVAL;
286     }
287 
288     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
289     if (!fp_port_from_pport(pport, &port)) {
290         return -ROCKER_EINVAL;
291     }
292     fp_port = r->fp_port[port];
293 
294     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
295     if (err) {
296         return err;
297     }
298 
299     fp_port_get_macaddr(fp_port, &macaddr);
300     mode = world_type(fp_port_get_world(fp_port));
301     learning = fp_port_get_learning(fp_port);
302     phys_name = fp_port_get_name(fp_port);
303 
304     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
305                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
306                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
307                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
308                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
309                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
310                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
311                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
312                rocker_tlv_total_size(strlen(phys_name));
313 
314     if (tlv_size > desc_buf_size(info)) {
315         return -ROCKER_EMSGSIZE;
316     }
317 
318     pos = 0;
319     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
320     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
321     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
322     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
323     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
324     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
325                    sizeof(macaddr.a), macaddr.a);
326     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
327     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
328                       learning);
329     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
330                    strlen(phys_name), phys_name);
331     rocker_tlv_nest_end(buf, &pos, nest);
332 
333     return desc_set_buf(info, tlv_size);
334 }
335 
336 static int cmd_set_port_settings(Rocker *r,
337                                  RockerTlv *cmd_info_tlv)
338 {
339     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
340     FpPort *fp_port;
341     uint32_t pport;
342     uint32_t port;
343     uint32_t speed;
344     uint8_t duplex;
345     uint8_t autoneg;
346     uint8_t learning;
347     MACAddr macaddr;
348     enum rocker_world_type mode;
349     int err;
350 
351     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
352                             cmd_info_tlv);
353 
354     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
355         return -ROCKER_EINVAL;
356     }
357 
358     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
359     if (!fp_port_from_pport(pport, &port)) {
360         return -ROCKER_EINVAL;
361     }
362     fp_port = r->fp_port[port];
363 
364     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
365         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
366         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
367 
368         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
369         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
370         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
371 
372         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
373         if (err) {
374             return err;
375         }
376     }
377 
378     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
379         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
380             sizeof(macaddr.a)) {
381             return -ROCKER_EINVAL;
382         }
383         memcpy(macaddr.a,
384                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
385                sizeof(macaddr.a));
386         fp_port_set_macaddr(fp_port, &macaddr);
387     }
388 
389     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
390         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
391         if (mode >= ROCKER_WORLD_TYPE_MAX) {
392             return -ROCKER_EINVAL;
393         }
394         /* We don't support world change. */
395         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
396             return -ROCKER_EINVAL;
397         }
398     }
399 
400     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
401         learning =
402             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
403         fp_port_set_learning(fp_port, learning);
404     }
405 
406     return ROCKER_OK;
407 }
408 
409 static int cmd_consume(Rocker *r, DescInfo *info)
410 {
411     char *buf = desc_get_buf(info, false);
412     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
413     RockerTlv *info_tlv;
414     World *world;
415     uint16_t cmd;
416     int err;
417 
418     if (!buf) {
419         return -ROCKER_ENXIO;
420     }
421 
422     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
423 
424     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
425         return -ROCKER_EINVAL;
426     }
427 
428     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
429     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
430 
431     /* This might be reworked to something like this:
432      * Every world will have an array of command handlers from
433      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
434      * up to each world to implement whatever command it want.
435      * It can reference "generic" commands as cmd_set_port_settings or
436      * cmd_get_port_settings
437      */
438 
439     switch (cmd) {
440     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
441     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
442     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
443     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
444     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
445     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
448         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
449         err = world_do_cmd(world, info, buf, cmd, info_tlv);
450         break;
451     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
452         err = cmd_get_port_settings(r, info, buf, info_tlv);
453         break;
454     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
455         err = cmd_set_port_settings(r, info_tlv);
456         break;
457     default:
458         err = -ROCKER_EINVAL;
459         break;
460     }
461 
462     return err;
463 }
464 
465 static void rocker_msix_irq(Rocker *r, unsigned vector)
466 {
467     PCIDevice *dev = PCI_DEVICE(r);
468 
469     DPRINTF("MSI-X notify request for vector %d\n", vector);
470     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
471         DPRINTF("incorrect vector %d\n", vector);
472         return;
473     }
474     msix_notify(dev, vector);
475 }
476 
477 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
478 {
479     DescRing *ring = r->rings[ROCKER_RING_EVENT];
480     DescInfo *info = desc_ring_fetch_desc(ring);
481     RockerTlv *nest;
482     char *buf;
483     size_t tlv_size;
484     int pos;
485     int err;
486 
487     if (!info) {
488         return -ROCKER_ENOBUFS;
489     }
490 
491     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
492                rocker_tlv_total_size(0) +                 /* nest */
493                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
494                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
495 
496     if (tlv_size > desc_buf_size(info)) {
497         err = -ROCKER_EMSGSIZE;
498         goto err_too_big;
499     }
500 
501     buf = desc_get_buf(info, false);
502     if (!buf) {
503         err = -ROCKER_ENOMEM;
504         goto err_no_mem;
505     }
506 
507     pos = 0;
508     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
509                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
510     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
511     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
512     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
513                       link_up ? 1 : 0);
514     rocker_tlv_nest_end(buf, &pos, nest);
515 
516     err = desc_set_buf(info, tlv_size);
517 
518 err_too_big:
519 err_no_mem:
520     if (desc_ring_post_desc(ring, err)) {
521         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
522     }
523 
524     return err;
525 }
526 
527 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
528                                uint16_t vlan_id)
529 {
530     DescRing *ring = r->rings[ROCKER_RING_EVENT];
531     DescInfo *info;
532     FpPort *fp_port;
533     uint32_t port;
534     RockerTlv *nest;
535     char *buf;
536     size_t tlv_size;
537     int pos;
538     int err;
539 
540     if (!fp_port_from_pport(pport, &port)) {
541         return -ROCKER_EINVAL;
542     }
543     fp_port = r->fp_port[port];
544     if (!fp_port_get_learning(fp_port)) {
545         return ROCKER_OK;
546     }
547 
548     info = desc_ring_fetch_desc(ring);
549     if (!info) {
550         return -ROCKER_ENOBUFS;
551     }
552 
553     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
554                rocker_tlv_total_size(0) +                 /* nest */
555                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
556                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
557                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
558 
559     if (tlv_size > desc_buf_size(info)) {
560         err = -ROCKER_EMSGSIZE;
561         goto err_too_big;
562     }
563 
564     buf = desc_get_buf(info, false);
565     if (!buf) {
566         err = -ROCKER_ENOMEM;
567         goto err_no_mem;
568     }
569 
570     pos = 0;
571     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
572                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
573     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
574     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
575     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
576     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
577     rocker_tlv_nest_end(buf, &pos, nest);
578 
579     err = desc_set_buf(info, tlv_size);
580 
581 err_too_big:
582 err_no_mem:
583     if (desc_ring_post_desc(ring, err)) {
584         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
585     }
586 
587     return err;
588 }
589 
590 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
591                                                      uint32_t pport)
592 {
593     return r->rings[(pport - 1) * 2 + 3];
594 }
595 
596 int rx_produce(World *world, uint32_t pport,
597                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
598 {
599     Rocker *r = world_rocker(world);
600     PCIDevice *dev = (PCIDevice *)r;
601     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
602     DescInfo *info = desc_ring_fetch_desc(ring);
603     char *data;
604     size_t data_size = iov_size(iov, iovcnt);
605     char *buf;
606     uint16_t rx_flags = 0;
607     uint16_t rx_csum = 0;
608     size_t tlv_size;
609     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
610     hwaddr frag_addr;
611     uint16_t frag_max_len;
612     int pos;
613     int err;
614 
615     if (!info) {
616         return -ROCKER_ENOBUFS;
617     }
618 
619     buf = desc_get_buf(info, false);
620     if (!buf) {
621         err = -ROCKER_ENXIO;
622         goto out;
623     }
624     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
625 
626     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
627         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
628         err = -ROCKER_EINVAL;
629         goto out;
630     }
631 
632     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
633     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
634 
635     if (data_size > frag_max_len) {
636         err = -ROCKER_EMSGSIZE;
637         goto out;
638     }
639 
640     if (copy_to_cpu) {
641         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
642     }
643 
644     /* XXX calc rx flags/csum */
645 
646     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
647                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
648                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
649                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
650                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
651 
652     if (tlv_size > desc_buf_size(info)) {
653         err = -ROCKER_EMSGSIZE;
654         goto out;
655     }
656 
657     /* TODO:
658      * iov dma write can be optimized in similar way e1000 does it in
659      * e1000_receive_iov. But maybe if would make sense to introduce
660      * generic helper iov_dma_write.
661      */
662 
663     data = g_malloc(data_size);
664 
665     iov_to_buf(iov, iovcnt, 0, data, data_size);
666     pci_dma_write(dev, frag_addr, data, data_size);
667     g_free(data);
668 
669     pos = 0;
670     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
671     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
672     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
673     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
674     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
675 
676     err = desc_set_buf(info, tlv_size);
677 
678 out:
679     if (desc_ring_post_desc(ring, err)) {
680         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
681     }
682 
683     return err;
684 }
685 
686 int rocker_port_eg(Rocker *r, uint32_t pport,
687                    const struct iovec *iov, int iovcnt)
688 {
689     FpPort *fp_port;
690     uint32_t port;
691 
692     if (!fp_port_from_pport(pport, &port)) {
693         return -ROCKER_EINVAL;
694     }
695 
696     fp_port = r->fp_port[port];
697 
698     return fp_port_eg(fp_port, iov, iovcnt);
699 }
700 
701 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
702 {
703     PCIDevice *dev = PCI_DEVICE(r);
704     char *buf;
705     int i;
706 
707     buf = g_malloc(r->test_dma_size);
708 
709     switch (val) {
710     case ROCKER_TEST_DMA_CTRL_CLEAR:
711         memset(buf, 0, r->test_dma_size);
712         break;
713     case ROCKER_TEST_DMA_CTRL_FILL:
714         memset(buf, 0x96, r->test_dma_size);
715         break;
716     case ROCKER_TEST_DMA_CTRL_INVERT:
717         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
718         for (i = 0; i < r->test_dma_size; i++) {
719             buf[i] = ~buf[i];
720         }
721         break;
722     default:
723         DPRINTF("not test dma control val=0x%08x\n", val);
724         goto err_out;
725     }
726     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
727 
728     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
729 
730 err_out:
731     g_free(buf);
732 }
733 
734 static void rocker_reset(DeviceState *dev);
735 
736 static void rocker_control(Rocker *r, uint32_t val)
737 {
738     if (val & ROCKER_CONTROL_RESET) {
739         rocker_reset(DEVICE(r));
740     }
741 }
742 
743 static int rocker_pci_ring_count(Rocker *r)
744 {
745     /* There are:
746      * - command ring
747      * - event ring
748      * - tx and rx ring per each port
749      */
750     return 2 + (2 * r->fp_ports);
751 }
752 
753 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
754 {
755     hwaddr start = ROCKER_DMA_DESC_BASE;
756     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
757 
758     return addr >= start && addr < end;
759 }
760 
761 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
762 {
763     int i;
764     bool old_enabled;
765     bool new_enabled;
766     FpPort *fp_port;
767 
768     for (i = 0; i < r->fp_ports; i++) {
769         fp_port = r->fp_port[i];
770         old_enabled = fp_port_enabled(fp_port);
771         new_enabled = (new >> (i + 1)) & 0x1;
772         if (new_enabled == old_enabled) {
773             continue;
774         }
775         if (new_enabled) {
776             fp_port_enable(r->fp_port[i]);
777         } else {
778             fp_port_disable(r->fp_port[i]);
779         }
780     }
781 }
782 
783 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
784 {
785     Rocker *r = opaque;
786 
787     if (rocker_addr_is_desc_reg(r, addr)) {
788         unsigned index = ROCKER_RING_INDEX(addr);
789         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
790 
791         switch (offset) {
792         case ROCKER_DMA_DESC_ADDR_OFFSET:
793             r->lower32 = (uint64_t)val;
794             break;
795         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
796             desc_ring_set_base_addr(r->rings[index],
797                                     ((uint64_t)val) << 32 | r->lower32);
798             r->lower32 = 0;
799             break;
800         case ROCKER_DMA_DESC_SIZE_OFFSET:
801             desc_ring_set_size(r->rings[index], val);
802             break;
803         case ROCKER_DMA_DESC_HEAD_OFFSET:
804             if (desc_ring_set_head(r->rings[index], val)) {
805                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
806             }
807             break;
808         case ROCKER_DMA_DESC_CTRL_OFFSET:
809             desc_ring_set_ctrl(r->rings[index], val);
810             break;
811         case ROCKER_DMA_DESC_CREDITS_OFFSET:
812             if (desc_ring_ret_credits(r->rings[index], val)) {
813                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
814             }
815             break;
816         default:
817             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
818                     " val=0x%08x (ring %d, addr=0x%02x)\n",
819                     addr, val, index, offset);
820             break;
821         }
822         return;
823     }
824 
825     switch (addr) {
826     case ROCKER_TEST_REG:
827         r->test_reg = val;
828         break;
829     case ROCKER_TEST_REG64:
830     case ROCKER_TEST_DMA_ADDR:
831     case ROCKER_PORT_PHYS_ENABLE:
832         r->lower32 = (uint64_t)val;
833         break;
834     case ROCKER_TEST_REG64 + 4:
835         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
836         r->lower32 = 0;
837         break;
838     case ROCKER_TEST_IRQ:
839         rocker_msix_irq(r, val);
840         break;
841     case ROCKER_TEST_DMA_SIZE:
842         r->test_dma_size = val & 0xFFFF;
843         break;
844     case ROCKER_TEST_DMA_ADDR + 4:
845         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
846         r->lower32 = 0;
847         break;
848     case ROCKER_TEST_DMA_CTRL:
849         rocker_test_dma_ctrl(r, val);
850         break;
851     case ROCKER_CONTROL:
852         rocker_control(r, val);
853         break;
854     case ROCKER_PORT_PHYS_ENABLE + 4:
855         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
856         r->lower32 = 0;
857         break;
858     default:
859         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
860                 " val=0x%08x\n", addr, val);
861         break;
862     }
863 }
864 
865 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
866 {
867     Rocker *r = opaque;
868 
869     if (rocker_addr_is_desc_reg(r, addr)) {
870         unsigned index = ROCKER_RING_INDEX(addr);
871         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
872 
873         switch (offset) {
874         case ROCKER_DMA_DESC_ADDR_OFFSET:
875             desc_ring_set_base_addr(r->rings[index], val);
876             break;
877         default:
878             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
879                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
880                     addr, val, index, offset);
881             break;
882         }
883         return;
884     }
885 
886     switch (addr) {
887     case ROCKER_TEST_REG64:
888         r->test_reg64 = val;
889         break;
890     case ROCKER_TEST_DMA_ADDR:
891         r->test_dma_addr = val;
892         break;
893     case ROCKER_PORT_PHYS_ENABLE:
894         rocker_port_phys_enable_write(r, val);
895         break;
896     default:
897         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
898                 " val=0x" TARGET_FMT_plx "\n", addr, val);
899         break;
900     }
901 }
902 
903 #ifdef DEBUG_ROCKER
904 #define regname(reg) case (reg): return #reg
905 static const char *rocker_reg_name(void *opaque, hwaddr addr)
906 {
907     Rocker *r = opaque;
908 
909     if (rocker_addr_is_desc_reg(r, addr)) {
910         unsigned index = ROCKER_RING_INDEX(addr);
911         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
912         static char buf[100];
913         char ring_name[10];
914 
915         switch (index) {
916         case 0:
917             sprintf(ring_name, "cmd");
918             break;
919         case 1:
920             sprintf(ring_name, "event");
921             break;
922         default:
923             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
924                     (index - 2) / 2);
925         }
926 
927         switch (offset) {
928         case ROCKER_DMA_DESC_ADDR_OFFSET:
929             sprintf(buf, "Ring[%s] ADDR", ring_name);
930             return buf;
931         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
932             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
933             return buf;
934         case ROCKER_DMA_DESC_SIZE_OFFSET:
935             sprintf(buf, "Ring[%s] SIZE", ring_name);
936             return buf;
937         case ROCKER_DMA_DESC_HEAD_OFFSET:
938             sprintf(buf, "Ring[%s] HEAD", ring_name);
939             return buf;
940         case ROCKER_DMA_DESC_TAIL_OFFSET:
941             sprintf(buf, "Ring[%s] TAIL", ring_name);
942             return buf;
943         case ROCKER_DMA_DESC_CTRL_OFFSET:
944             sprintf(buf, "Ring[%s] CTRL", ring_name);
945             return buf;
946         case ROCKER_DMA_DESC_CREDITS_OFFSET:
947             sprintf(buf, "Ring[%s] CREDITS", ring_name);
948             return buf;
949         default:
950             sprintf(buf, "Ring[%s] ???", ring_name);
951             return buf;
952         }
953     } else {
954         switch (addr) {
955             regname(ROCKER_BOGUS_REG0);
956             regname(ROCKER_BOGUS_REG1);
957             regname(ROCKER_BOGUS_REG2);
958             regname(ROCKER_BOGUS_REG3);
959             regname(ROCKER_TEST_REG);
960             regname(ROCKER_TEST_REG64);
961             regname(ROCKER_TEST_REG64+4);
962             regname(ROCKER_TEST_IRQ);
963             regname(ROCKER_TEST_DMA_ADDR);
964             regname(ROCKER_TEST_DMA_ADDR+4);
965             regname(ROCKER_TEST_DMA_SIZE);
966             regname(ROCKER_TEST_DMA_CTRL);
967             regname(ROCKER_CONTROL);
968             regname(ROCKER_PORT_PHYS_COUNT);
969             regname(ROCKER_PORT_PHYS_LINK_STATUS);
970             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
971             regname(ROCKER_PORT_PHYS_ENABLE);
972             regname(ROCKER_PORT_PHYS_ENABLE+4);
973             regname(ROCKER_SWITCH_ID);
974             regname(ROCKER_SWITCH_ID+4);
975         }
976     }
977     return "???";
978 }
979 #else
980 static const char *rocker_reg_name(void *opaque, hwaddr addr)
981 {
982     return NULL;
983 }
984 #endif
985 
986 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
987                               unsigned size)
988 {
989     DPRINTF("Write %s addr " TARGET_FMT_plx
990             ", size %u, val " TARGET_FMT_plx "\n",
991             rocker_reg_name(opaque, addr), addr, size, val);
992 
993     switch (size) {
994     case 4:
995         rocker_io_writel(opaque, addr, val);
996         break;
997     case 8:
998         rocker_io_writeq(opaque, addr, val);
999         break;
1000     }
1001 }
1002 
1003 static uint64_t rocker_port_phys_link_status(Rocker *r)
1004 {
1005     int i;
1006     uint64_t status = 0;
1007 
1008     for (i = 0; i < r->fp_ports; i++) {
1009         FpPort *port = r->fp_port[i];
1010 
1011         if (fp_port_get_link_up(port)) {
1012             status |= 1 << (i + 1);
1013         }
1014     }
1015     return status;
1016 }
1017 
1018 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1019 {
1020     int i;
1021     uint64_t ret = 0;
1022 
1023     for (i = 0; i < r->fp_ports; i++) {
1024         FpPort *port = r->fp_port[i];
1025 
1026         if (fp_port_enabled(port)) {
1027             ret |= 1 << (i + 1);
1028         }
1029     }
1030     return ret;
1031 }
1032 
1033 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1034 {
1035     Rocker *r = opaque;
1036     uint32_t ret;
1037 
1038     if (rocker_addr_is_desc_reg(r, addr)) {
1039         unsigned index = ROCKER_RING_INDEX(addr);
1040         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1041 
1042         switch (offset) {
1043         case ROCKER_DMA_DESC_ADDR_OFFSET:
1044             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1045             break;
1046         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1047             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1048             break;
1049         case ROCKER_DMA_DESC_SIZE_OFFSET:
1050             ret = desc_ring_get_size(r->rings[index]);
1051             break;
1052         case ROCKER_DMA_DESC_HEAD_OFFSET:
1053             ret = desc_ring_get_head(r->rings[index]);
1054             break;
1055         case ROCKER_DMA_DESC_TAIL_OFFSET:
1056             ret = desc_ring_get_tail(r->rings[index]);
1057             break;
1058         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1059             ret = desc_ring_get_credits(r->rings[index]);
1060             break;
1061         default:
1062             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1063                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1064             ret = 0;
1065             break;
1066         }
1067         return ret;
1068     }
1069 
1070     switch (addr) {
1071     case ROCKER_BOGUS_REG0:
1072     case ROCKER_BOGUS_REG1:
1073     case ROCKER_BOGUS_REG2:
1074     case ROCKER_BOGUS_REG3:
1075         ret = 0xDEADBABE;
1076         break;
1077     case ROCKER_TEST_REG:
1078         ret = r->test_reg * 2;
1079         break;
1080     case ROCKER_TEST_REG64:
1081         ret = (uint32_t)(r->test_reg64 * 2);
1082         break;
1083     case ROCKER_TEST_REG64 + 4:
1084         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1085         break;
1086     case ROCKER_TEST_DMA_SIZE:
1087         ret = r->test_dma_size;
1088         break;
1089     case ROCKER_TEST_DMA_ADDR:
1090         ret = (uint32_t)r->test_dma_addr;
1091         break;
1092     case ROCKER_TEST_DMA_ADDR + 4:
1093         ret = (uint32_t)(r->test_dma_addr >> 32);
1094         break;
1095     case ROCKER_PORT_PHYS_COUNT:
1096         ret = r->fp_ports;
1097         break;
1098     case ROCKER_PORT_PHYS_LINK_STATUS:
1099         ret = (uint32_t)rocker_port_phys_link_status(r);
1100         break;
1101     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1102         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1103         break;
1104     case ROCKER_PORT_PHYS_ENABLE:
1105         ret = (uint32_t)rocker_port_phys_enable_read(r);
1106         break;
1107     case ROCKER_PORT_PHYS_ENABLE + 4:
1108         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1109         break;
1110     case ROCKER_SWITCH_ID:
1111         ret = (uint32_t)r->switch_id;
1112         break;
1113     case ROCKER_SWITCH_ID + 4:
1114         ret = (uint32_t)(r->switch_id >> 32);
1115         break;
1116     default:
1117         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1118         ret = 0;
1119         break;
1120     }
1121     return ret;
1122 }
1123 
1124 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1125 {
1126     Rocker *r = opaque;
1127     uint64_t ret;
1128 
1129     if (rocker_addr_is_desc_reg(r, addr)) {
1130         unsigned index = ROCKER_RING_INDEX(addr);
1131         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1132 
1133         switch (addr & ROCKER_DMA_DESC_MASK) {
1134         case ROCKER_DMA_DESC_ADDR_OFFSET:
1135             ret = desc_ring_get_base_addr(r->rings[index]);
1136             break;
1137         default:
1138             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1139                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1140             ret = 0;
1141             break;
1142         }
1143         return ret;
1144     }
1145 
1146     switch (addr) {
1147     case ROCKER_BOGUS_REG0:
1148     case ROCKER_BOGUS_REG2:
1149         ret = 0xDEADBABEDEADBABEULL;
1150         break;
1151     case ROCKER_TEST_REG64:
1152         ret = r->test_reg64 * 2;
1153         break;
1154     case ROCKER_TEST_DMA_ADDR:
1155         ret = r->test_dma_addr;
1156         break;
1157     case ROCKER_PORT_PHYS_LINK_STATUS:
1158         ret = rocker_port_phys_link_status(r);
1159         break;
1160     case ROCKER_PORT_PHYS_ENABLE:
1161         ret = rocker_port_phys_enable_read(r);
1162         break;
1163     case ROCKER_SWITCH_ID:
1164         ret = r->switch_id;
1165         break;
1166     default:
1167         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1168         ret = 0;
1169         break;
1170     }
1171     return ret;
1172 }
1173 
1174 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1175 {
1176     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1177             rocker_reg_name(opaque, addr), addr, size);
1178 
1179     switch (size) {
1180     case 4:
1181         return rocker_io_readl(opaque, addr);
1182     case 8:
1183         return rocker_io_readq(opaque, addr);
1184     }
1185 
1186     return -1;
1187 }
1188 
1189 static const MemoryRegionOps rocker_mmio_ops = {
1190     .read = rocker_mmio_read,
1191     .write = rocker_mmio_write,
1192     .endianness = DEVICE_LITTLE_ENDIAN,
1193     .valid = {
1194         .min_access_size = 4,
1195         .max_access_size = 8,
1196     },
1197     .impl = {
1198         .min_access_size = 4,
1199         .max_access_size = 8,
1200     },
1201 };
1202 
1203 static void rocker_msix_vectors_unuse(Rocker *r,
1204                                       unsigned int num_vectors)
1205 {
1206     PCIDevice *dev = PCI_DEVICE(r);
1207     int i;
1208 
1209     for (i = 0; i < num_vectors; i++) {
1210         msix_vector_unuse(dev, i);
1211     }
1212 }
1213 
1214 static int rocker_msix_vectors_use(Rocker *r,
1215                                    unsigned int num_vectors)
1216 {
1217     PCIDevice *dev = PCI_DEVICE(r);
1218     int err;
1219     int i;
1220 
1221     for (i = 0; i < num_vectors; i++) {
1222         err = msix_vector_use(dev, i);
1223         if (err) {
1224             goto rollback;
1225         }
1226     }
1227     return 0;
1228 
1229 rollback:
1230     rocker_msix_vectors_unuse(r, i);
1231     return err;
1232 }
1233 
1234 static int rocker_msix_init(Rocker *r, Error **errp)
1235 {
1236     PCIDevice *dev = PCI_DEVICE(r);
1237     int err;
1238 
1239     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1240                     &r->msix_bar,
1241                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1242                     &r->msix_bar,
1243                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1244                     0, errp);
1245     if (err) {
1246         return err;
1247     }
1248 
1249     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1250     if (err) {
1251         goto err_msix_vectors_use;
1252     }
1253 
1254     return 0;
1255 
1256 err_msix_vectors_use:
1257     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1258     return err;
1259 }
1260 
1261 static void rocker_msix_uninit(Rocker *r)
1262 {
1263     PCIDevice *dev = PCI_DEVICE(r);
1264 
1265     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1266     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1267 }
1268 
1269 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1270 {
1271     int i;
1272 
1273     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1274         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1275             return r->worlds[i];
1276         }
1277     }
1278     return NULL;
1279 }
1280 
1281 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1282 {
1283     Rocker *r = ROCKER(dev);
1284     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1285     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1286     static int sw_index;
1287     int i, err = 0;
1288 
1289     /* allocate worlds */
1290 
1291     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1292 
1293     if (!r->world_name) {
1294         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1295     }
1296 
1297     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1298     if (!r->world_dflt) {
1299         error_setg(errp,
1300                 "invalid argument requested world %s does not exist",
1301                 r->world_name);
1302         goto err_world_type_by_name;
1303     }
1304 
1305     /* set up memory-mapped region at BAR0 */
1306 
1307     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1308                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1309     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1310                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1311 
1312     /* set up memory-mapped region for MSI-X */
1313 
1314     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1315                        ROCKER_PCI_MSIX_BAR_SIZE);
1316     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1317                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1318 
1319     /* MSI-X init */
1320 
1321     err = rocker_msix_init(r, errp);
1322     if (err) {
1323         goto err_msix_init;
1324     }
1325 
1326     /* validate switch properties */
1327 
1328     if (!r->name) {
1329         r->name = g_strdup(TYPE_ROCKER);
1330     }
1331 
1332     if (rocker_find(r->name)) {
1333         error_setg(errp, "%s already exists", r->name);
1334         goto err_duplicate;
1335     }
1336 
1337     /* Rocker name is passed in port name requests to OS with the intention
1338      * that the name is used in interface names. Limit the length of the
1339      * rocker name to avoid naming problems in the OS. Also, adding the
1340      * port number as p# and unganged breakout b#, where # is at most 2
1341      * digits, so leave room for it too (-1 for string terminator, -3 for
1342      * p# and -3 for b#)
1343      */
1344 #define ROCKER_IFNAMSIZ 16
1345 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1346     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1347         error_setg(errp,
1348                 "name too long; please shorten to at most %d chars",
1349                 MAX_ROCKER_NAME_LEN);
1350         goto err_name_too_long;
1351     }
1352 
1353     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1354         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1355         r->fp_start_macaddr.a[4] += (sw_index++);
1356     }
1357 
1358     if (!r->switch_id) {
1359         memcpy(&r->switch_id, &r->fp_start_macaddr,
1360                sizeof(r->fp_start_macaddr));
1361     }
1362 
1363     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1364         r->fp_ports = ROCKER_FP_PORTS_MAX;
1365     }
1366 
1367     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1368 
1369     /* Rings are ordered like this:
1370      * - command ring
1371      * - event ring
1372      * - port0 tx ring
1373      * - port0 rx ring
1374      * - port1 tx ring
1375      * - port1 rx ring
1376      * .....
1377      */
1378 
1379     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1380         DescRing *ring = desc_ring_alloc(r, i);
1381 
1382         if (i == ROCKER_RING_CMD) {
1383             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1384         } else if (i == ROCKER_RING_EVENT) {
1385             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1386         } else if (i % 2 == 0) {
1387             desc_ring_set_consume(ring, tx_consume,
1388                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1389         } else if (i % 2 == 1) {
1390             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1391         }
1392 
1393         r->rings[i] = ring;
1394     }
1395 
1396     for (i = 0; i < r->fp_ports; i++) {
1397         FpPort *port =
1398             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1399                           i, &r->fp_ports_peers[i]);
1400 
1401         r->fp_port[i] = port;
1402         fp_port_set_world(port, r->world_dflt);
1403     }
1404 
1405     QLIST_INSERT_HEAD(&rockers, r, next);
1406 
1407     return;
1408 
1409 err_name_too_long:
1410 err_duplicate:
1411     rocker_msix_uninit(r);
1412 err_msix_init:
1413     object_unparent(OBJECT(&r->msix_bar));
1414     object_unparent(OBJECT(&r->mmio));
1415 err_world_type_by_name:
1416     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1417         if (r->worlds[i]) {
1418             world_free(r->worlds[i]);
1419         }
1420     }
1421 }
1422 
1423 static void pci_rocker_uninit(PCIDevice *dev)
1424 {
1425     Rocker *r = ROCKER(dev);
1426     int i;
1427 
1428     QLIST_REMOVE(r, next);
1429 
1430     for (i = 0; i < r->fp_ports; i++) {
1431         FpPort *port = r->fp_port[i];
1432 
1433         fp_port_free(port);
1434         r->fp_port[i] = NULL;
1435     }
1436 
1437     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1438         if (r->rings[i]) {
1439             desc_ring_free(r->rings[i]);
1440         }
1441     }
1442     g_free(r->rings);
1443 
1444     rocker_msix_uninit(r);
1445     object_unparent(OBJECT(&r->msix_bar));
1446     object_unparent(OBJECT(&r->mmio));
1447 
1448     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1449         if (r->worlds[i]) {
1450             world_free(r->worlds[i]);
1451         }
1452     }
1453     g_free(r->fp_ports_peers);
1454 }
1455 
1456 static void rocker_reset(DeviceState *dev)
1457 {
1458     Rocker *r = ROCKER(dev);
1459     int i;
1460 
1461     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1462         if (r->worlds[i]) {
1463             world_reset(r->worlds[i]);
1464         }
1465     }
1466     for (i = 0; i < r->fp_ports; i++) {
1467         fp_port_reset(r->fp_port[i]);
1468         fp_port_set_world(r->fp_port[i], r->world_dflt);
1469     }
1470 
1471     r->test_reg = 0;
1472     r->test_reg64 = 0;
1473     r->test_dma_addr = 0;
1474     r->test_dma_size = 0;
1475 
1476     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1477         desc_ring_reset(r->rings[i]);
1478     }
1479 
1480     DPRINTF("Reset done\n");
1481 }
1482 
1483 static Property rocker_properties[] = {
1484     DEFINE_PROP_STRING("name", Rocker, name),
1485     DEFINE_PROP_STRING("world", Rocker, world_name),
1486     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1487                         fp_start_macaddr),
1488     DEFINE_PROP_UINT64("switch_id", Rocker,
1489                        switch_id, 0),
1490     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1491                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1492     DEFINE_PROP_END_OF_LIST(),
1493 };
1494 
1495 static const VMStateDescription rocker_vmsd = {
1496     .name = TYPE_ROCKER,
1497     .unmigratable = 1,
1498 };
1499 
1500 static void rocker_class_init(ObjectClass *klass, void *data)
1501 {
1502     DeviceClass *dc = DEVICE_CLASS(klass);
1503     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1504 
1505     k->realize = pci_rocker_realize;
1506     k->exit = pci_rocker_uninit;
1507     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1508     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1509     k->revision = ROCKER_PCI_REVISION;
1510     k->class_id = PCI_CLASS_NETWORK_OTHER;
1511     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1512     dc->desc = "Rocker Switch";
1513     dc->reset = rocker_reset;
1514     device_class_set_props(dc, rocker_properties);
1515     dc->vmsd = &rocker_vmsd;
1516 }
1517 
1518 static const TypeInfo rocker_info = {
1519     .name          = TYPE_ROCKER,
1520     .parent        = TYPE_PCI_DEVICE,
1521     .instance_size = sizeof(Rocker),
1522     .class_init    = rocker_class_init,
1523     .interfaces = (InterfaceInfo[]) {
1524         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1525         { },
1526     },
1527 };
1528 
1529 static void rocker_register_types(void)
1530 {
1531     type_register_static(&rocker_info);
1532 }
1533 
1534 type_init(rocker_register_types)
1535