xref: /qemu/hw/net/rocker/rocker.c (revision abff1abf)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/pci/pci.h"
20 #include "hw/qdev-properties.h"
21 #include "migration/vmstate.h"
22 #include "hw/pci/msix.h"
23 #include "net/net.h"
24 #include "net/eth.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-commands-rocker.h"
27 #include "qemu/iov.h"
28 #include "qemu/module.h"
29 #include "qemu/bitops.h"
30 #include "qemu/log.h"
31 
32 #include "rocker.h"
33 #include "rocker_hw.h"
34 #include "rocker_fp.h"
35 #include "rocker_desc.h"
36 #include "rocker_tlv.h"
37 #include "rocker_world.h"
38 #include "rocker_of_dpa.h"
39 
40 struct rocker {
41     /* private */
42     PCIDevice parent_obj;
43     /* public */
44 
45     MemoryRegion mmio;
46     MemoryRegion msix_bar;
47 
48     /* switch configuration */
49     char *name;                  /* switch name */
50     char *world_name;            /* world name */
51     uint32_t fp_ports;           /* front-panel port count */
52     NICPeers *fp_ports_peers;
53     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
54     uint64_t switch_id;          /* switch id */
55 
56     /* front-panel ports */
57     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
58 
59     /* register backings */
60     uint32_t test_reg;
61     uint64_t test_reg64;
62     dma_addr_t test_dma_addr;
63     uint32_t test_dma_size;
64     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
65 
66     /* desc rings */
67     DescRing **rings;
68 
69     /* switch worlds */
70     World *worlds[ROCKER_WORLD_TYPE_MAX];
71     World *world_dflt;
72 
73     QLIST_ENTRY(rocker) next;
74 };
75 
76 #define TYPE_ROCKER "rocker"
77 
78 #define ROCKER(obj) \
79     OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
80 
81 static QLIST_HEAD(, rocker) rockers;
82 
83 Rocker *rocker_find(const char *name)
84 {
85     Rocker *r;
86 
87     QLIST_FOREACH(r, &rockers, next)
88         if (strcmp(r->name, name) == 0) {
89             return r;
90         }
91 
92     return NULL;
93 }
94 
95 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
96 {
97     if (type < ROCKER_WORLD_TYPE_MAX) {
98         return r->worlds[type];
99     }
100     return NULL;
101 }
102 
103 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
104 {
105     RockerSwitch *rocker;
106     Rocker *r;
107 
108     r = rocker_find(name);
109     if (!r) {
110         error_setg(errp, "rocker %s not found", name);
111         return NULL;
112     }
113 
114     rocker = g_new0(RockerSwitch, 1);
115     rocker->name = g_strdup(r->name);
116     rocker->id = r->switch_id;
117     rocker->ports = r->fp_ports;
118 
119     return rocker;
120 }
121 
122 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
123 {
124     RockerPortList *list = NULL;
125     Rocker *r;
126     int i;
127 
128     r = rocker_find(name);
129     if (!r) {
130         error_setg(errp, "rocker %s not found", name);
131         return NULL;
132     }
133 
134     for (i = r->fp_ports - 1; i >= 0; i--) {
135         RockerPortList *info = g_malloc0(sizeof(*info));
136         info->value = g_malloc0(sizeof(*info->value));
137         struct fp_port *port = r->fp_port[i];
138 
139         fp_port_get_info(port, info);
140         info->next = list;
141         list = info;
142     }
143 
144     return list;
145 }
146 
147 uint32_t rocker_fp_ports(Rocker *r)
148 {
149     return r->fp_ports;
150 }
151 
152 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
153                                             DescRing *ring)
154 {
155     return (desc_ring_index(ring) - 2) / 2 + 1;
156 }
157 
158 static int tx_consume(Rocker *r, DescInfo *info)
159 {
160     PCIDevice *dev = PCI_DEVICE(r);
161     char *buf = desc_get_buf(info, true);
162     RockerTlv *tlv_frag;
163     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
164     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
165     uint32_t pport;
166     uint32_t port;
167     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
168     uint16_t tx_l3_csum_off = 0;
169     uint16_t tx_tso_mss = 0;
170     uint16_t tx_tso_hdr_len = 0;
171     int iovcnt = 0;
172     int err = ROCKER_OK;
173     int rem;
174     int i;
175 
176     if (!buf) {
177         return -ROCKER_ENXIO;
178     }
179 
180     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
181 
182     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
183         return -ROCKER_EINVAL;
184     }
185 
186     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
187     if (!fp_port_from_pport(pport, &port)) {
188         return -ROCKER_EINVAL;
189     }
190 
191     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
192         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
193     }
194 
195     switch (tx_offload) {
196     case ROCKER_TX_OFFLOAD_L3_CSUM:
197         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
198             return -ROCKER_EINVAL;
199         }
200         break;
201     case ROCKER_TX_OFFLOAD_TSO:
202         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
203             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
204             return -ROCKER_EINVAL;
205         }
206         break;
207     }
208 
209     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
210         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
211         qemu_log_mask(LOG_UNIMP, "rocker %s: L3 not implemented"
212                                  " (cksum off: %u)\n",
213                       __func__, tx_l3_csum_off);
214     }
215 
216     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
217         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
218         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented (MSS: %u)\n",
219                       __func__, tx_tso_mss);
220     }
221 
222     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
223         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
224         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented"
225                                  " (hdr length: %u)\n",
226                       __func__, tx_tso_hdr_len);
227     }
228 
229     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
230         hwaddr frag_addr;
231         uint16_t frag_len;
232 
233         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
234             err = -ROCKER_EINVAL;
235             goto err_bad_attr;
236         }
237 
238         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
239 
240         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
241             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
242             err = -ROCKER_EINVAL;
243             goto err_bad_attr;
244         }
245 
246         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
247         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
248 
249         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
250             goto err_too_many_frags;
251         }
252         iov[iovcnt].iov_len = frag_len;
253         iov[iovcnt].iov_base = g_malloc(frag_len);
254 
255         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
256                      iov[iovcnt].iov_len);
257 
258         iovcnt++;
259     }
260 
261     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
262 
263 err_too_many_frags:
264 err_bad_attr:
265     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
266         g_free(iov[i].iov_base);
267     }
268 
269     return err;
270 }
271 
272 static int cmd_get_port_settings(Rocker *r,
273                                  DescInfo *info, char *buf,
274                                  RockerTlv *cmd_info_tlv)
275 {
276     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
277     RockerTlv *nest;
278     FpPort *fp_port;
279     uint32_t pport;
280     uint32_t port;
281     uint32_t speed;
282     uint8_t duplex;
283     uint8_t autoneg;
284     uint8_t learning;
285     char *phys_name;
286     MACAddr macaddr;
287     enum rocker_world_type mode;
288     size_t tlv_size;
289     int pos;
290     int err;
291 
292     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
293                             cmd_info_tlv);
294 
295     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
296         return -ROCKER_EINVAL;
297     }
298 
299     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
300     if (!fp_port_from_pport(pport, &port)) {
301         return -ROCKER_EINVAL;
302     }
303     fp_port = r->fp_port[port];
304 
305     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
306     if (err) {
307         return err;
308     }
309 
310     fp_port_get_macaddr(fp_port, &macaddr);
311     mode = world_type(fp_port_get_world(fp_port));
312     learning = fp_port_get_learning(fp_port);
313     phys_name = fp_port_get_name(fp_port);
314 
315     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
316                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
317                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
318                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
319                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
320                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
321                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
322                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
323                rocker_tlv_total_size(strlen(phys_name));
324 
325     if (tlv_size > desc_buf_size(info)) {
326         return -ROCKER_EMSGSIZE;
327     }
328 
329     pos = 0;
330     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
331     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
332     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
333     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
334     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
335     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
336                    sizeof(macaddr.a), macaddr.a);
337     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
338     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
339                       learning);
340     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
341                    strlen(phys_name), phys_name);
342     rocker_tlv_nest_end(buf, &pos, nest);
343 
344     return desc_set_buf(info, tlv_size);
345 }
346 
347 static int cmd_set_port_settings(Rocker *r,
348                                  RockerTlv *cmd_info_tlv)
349 {
350     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
351     FpPort *fp_port;
352     uint32_t pport;
353     uint32_t port;
354     uint32_t speed;
355     uint8_t duplex;
356     uint8_t autoneg;
357     uint8_t learning;
358     MACAddr macaddr;
359     enum rocker_world_type mode;
360     int err;
361 
362     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
363                             cmd_info_tlv);
364 
365     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
366         return -ROCKER_EINVAL;
367     }
368 
369     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
370     if (!fp_port_from_pport(pport, &port)) {
371         return -ROCKER_EINVAL;
372     }
373     fp_port = r->fp_port[port];
374 
375     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
376         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
377         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
378 
379         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
380         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
381         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
382 
383         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
384         if (err) {
385             return err;
386         }
387     }
388 
389     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
390         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
391             sizeof(macaddr.a)) {
392             return -ROCKER_EINVAL;
393         }
394         memcpy(macaddr.a,
395                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
396                sizeof(macaddr.a));
397         fp_port_set_macaddr(fp_port, &macaddr);
398     }
399 
400     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
401         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
402         if (mode >= ROCKER_WORLD_TYPE_MAX) {
403             return -ROCKER_EINVAL;
404         }
405         /* We don't support world change. */
406         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
407             return -ROCKER_EINVAL;
408         }
409     }
410 
411     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
412         learning =
413             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
414         fp_port_set_learning(fp_port, learning);
415     }
416 
417     return ROCKER_OK;
418 }
419 
420 static int cmd_consume(Rocker *r, DescInfo *info)
421 {
422     char *buf = desc_get_buf(info, false);
423     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
424     RockerTlv *info_tlv;
425     World *world;
426     uint16_t cmd;
427     int err;
428 
429     if (!buf) {
430         return -ROCKER_ENXIO;
431     }
432 
433     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
434 
435     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
436         return -ROCKER_EINVAL;
437     }
438 
439     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
440     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
441 
442     /* This might be reworked to something like this:
443      * Every world will have an array of command handlers from
444      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
445      * up to each world to implement whatever command it want.
446      * It can reference "generic" commands as cmd_set_port_settings or
447      * cmd_get_port_settings
448      */
449 
450     switch (cmd) {
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
454     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
455     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
456     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
457     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
458     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
459         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
460         err = world_do_cmd(world, info, buf, cmd, info_tlv);
461         break;
462     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
463         err = cmd_get_port_settings(r, info, buf, info_tlv);
464         break;
465     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
466         err = cmd_set_port_settings(r, info_tlv);
467         break;
468     default:
469         err = -ROCKER_EINVAL;
470         break;
471     }
472 
473     return err;
474 }
475 
476 static void rocker_msix_irq(Rocker *r, unsigned vector)
477 {
478     PCIDevice *dev = PCI_DEVICE(r);
479 
480     DPRINTF("MSI-X notify request for vector %d\n", vector);
481     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
482         DPRINTF("incorrect vector %d\n", vector);
483         return;
484     }
485     msix_notify(dev, vector);
486 }
487 
488 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
489 {
490     DescRing *ring = r->rings[ROCKER_RING_EVENT];
491     DescInfo *info = desc_ring_fetch_desc(ring);
492     RockerTlv *nest;
493     char *buf;
494     size_t tlv_size;
495     int pos;
496     int err;
497 
498     if (!info) {
499         return -ROCKER_ENOBUFS;
500     }
501 
502     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
503                rocker_tlv_total_size(0) +                 /* nest */
504                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
505                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
506 
507     if (tlv_size > desc_buf_size(info)) {
508         err = -ROCKER_EMSGSIZE;
509         goto err_too_big;
510     }
511 
512     buf = desc_get_buf(info, false);
513     if (!buf) {
514         err = -ROCKER_ENOMEM;
515         goto err_no_mem;
516     }
517 
518     pos = 0;
519     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
520                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
521     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
522     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
523     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
524                       link_up ? 1 : 0);
525     rocker_tlv_nest_end(buf, &pos, nest);
526 
527     err = desc_set_buf(info, tlv_size);
528 
529 err_too_big:
530 err_no_mem:
531     if (desc_ring_post_desc(ring, err)) {
532         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
533     }
534 
535     return err;
536 }
537 
538 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
539                                uint16_t vlan_id)
540 {
541     DescRing *ring = r->rings[ROCKER_RING_EVENT];
542     DescInfo *info;
543     FpPort *fp_port;
544     uint32_t port;
545     RockerTlv *nest;
546     char *buf;
547     size_t tlv_size;
548     int pos;
549     int err;
550 
551     if (!fp_port_from_pport(pport, &port)) {
552         return -ROCKER_EINVAL;
553     }
554     fp_port = r->fp_port[port];
555     if (!fp_port_get_learning(fp_port)) {
556         return ROCKER_OK;
557     }
558 
559     info = desc_ring_fetch_desc(ring);
560     if (!info) {
561         return -ROCKER_ENOBUFS;
562     }
563 
564     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
565                rocker_tlv_total_size(0) +                 /* nest */
566                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
567                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
568                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
569 
570     if (tlv_size > desc_buf_size(info)) {
571         err = -ROCKER_EMSGSIZE;
572         goto err_too_big;
573     }
574 
575     buf = desc_get_buf(info, false);
576     if (!buf) {
577         err = -ROCKER_ENOMEM;
578         goto err_no_mem;
579     }
580 
581     pos = 0;
582     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
583                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
584     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
585     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
586     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
587     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
588     rocker_tlv_nest_end(buf, &pos, nest);
589 
590     err = desc_set_buf(info, tlv_size);
591 
592 err_too_big:
593 err_no_mem:
594     if (desc_ring_post_desc(ring, err)) {
595         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
596     }
597 
598     return err;
599 }
600 
601 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
602                                                      uint32_t pport)
603 {
604     return r->rings[(pport - 1) * 2 + 3];
605 }
606 
607 int rx_produce(World *world, uint32_t pport,
608                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
609 {
610     Rocker *r = world_rocker(world);
611     PCIDevice *dev = (PCIDevice *)r;
612     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
613     DescInfo *info = desc_ring_fetch_desc(ring);
614     char *data;
615     size_t data_size = iov_size(iov, iovcnt);
616     char *buf;
617     uint16_t rx_flags = 0;
618     uint16_t rx_csum = 0;
619     size_t tlv_size;
620     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
621     hwaddr frag_addr;
622     uint16_t frag_max_len;
623     int pos;
624     int err;
625 
626     if (!info) {
627         return -ROCKER_ENOBUFS;
628     }
629 
630     buf = desc_get_buf(info, false);
631     if (!buf) {
632         err = -ROCKER_ENXIO;
633         goto out;
634     }
635     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
636 
637     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
638         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
639         err = -ROCKER_EINVAL;
640         goto out;
641     }
642 
643     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
644     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
645 
646     if (data_size > frag_max_len) {
647         err = -ROCKER_EMSGSIZE;
648         goto out;
649     }
650 
651     if (copy_to_cpu) {
652         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
653     }
654 
655     /* XXX calc rx flags/csum */
656 
657     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
658                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
659                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
660                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
661                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
662 
663     if (tlv_size > desc_buf_size(info)) {
664         err = -ROCKER_EMSGSIZE;
665         goto out;
666     }
667 
668     /* TODO:
669      * iov dma write can be optimized in similar way e1000 does it in
670      * e1000_receive_iov. But maybe if would make sense to introduce
671      * generic helper iov_dma_write.
672      */
673 
674     data = g_malloc(data_size);
675 
676     iov_to_buf(iov, iovcnt, 0, data, data_size);
677     pci_dma_write(dev, frag_addr, data, data_size);
678     g_free(data);
679 
680     pos = 0;
681     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
682     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
683     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
684     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
685     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
686 
687     err = desc_set_buf(info, tlv_size);
688 
689 out:
690     if (desc_ring_post_desc(ring, err)) {
691         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
692     }
693 
694     return err;
695 }
696 
697 int rocker_port_eg(Rocker *r, uint32_t pport,
698                    const struct iovec *iov, int iovcnt)
699 {
700     FpPort *fp_port;
701     uint32_t port;
702 
703     if (!fp_port_from_pport(pport, &port)) {
704         return -ROCKER_EINVAL;
705     }
706 
707     fp_port = r->fp_port[port];
708 
709     return fp_port_eg(fp_port, iov, iovcnt);
710 }
711 
712 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
713 {
714     PCIDevice *dev = PCI_DEVICE(r);
715     char *buf;
716     int i;
717 
718     buf = g_malloc(r->test_dma_size);
719 
720     switch (val) {
721     case ROCKER_TEST_DMA_CTRL_CLEAR:
722         memset(buf, 0, r->test_dma_size);
723         break;
724     case ROCKER_TEST_DMA_CTRL_FILL:
725         memset(buf, 0x96, r->test_dma_size);
726         break;
727     case ROCKER_TEST_DMA_CTRL_INVERT:
728         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
729         for (i = 0; i < r->test_dma_size; i++) {
730             buf[i] = ~buf[i];
731         }
732         break;
733     default:
734         DPRINTF("not test dma control val=0x%08x\n", val);
735         goto err_out;
736     }
737     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
738 
739     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
740 
741 err_out:
742     g_free(buf);
743 }
744 
745 static void rocker_reset(DeviceState *dev);
746 
747 static void rocker_control(Rocker *r, uint32_t val)
748 {
749     if (val & ROCKER_CONTROL_RESET) {
750         rocker_reset(DEVICE(r));
751     }
752 }
753 
754 static int rocker_pci_ring_count(Rocker *r)
755 {
756     /* There are:
757      * - command ring
758      * - event ring
759      * - tx and rx ring per each port
760      */
761     return 2 + (2 * r->fp_ports);
762 }
763 
764 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
765 {
766     hwaddr start = ROCKER_DMA_DESC_BASE;
767     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
768 
769     return addr >= start && addr < end;
770 }
771 
772 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
773 {
774     int i;
775     bool old_enabled;
776     bool new_enabled;
777     FpPort *fp_port;
778 
779     for (i = 0; i < r->fp_ports; i++) {
780         fp_port = r->fp_port[i];
781         old_enabled = fp_port_enabled(fp_port);
782         new_enabled = (new >> (i + 1)) & 0x1;
783         if (new_enabled == old_enabled) {
784             continue;
785         }
786         if (new_enabled) {
787             fp_port_enable(r->fp_port[i]);
788         } else {
789             fp_port_disable(r->fp_port[i]);
790         }
791     }
792 }
793 
794 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
795 {
796     Rocker *r = opaque;
797 
798     if (rocker_addr_is_desc_reg(r, addr)) {
799         unsigned index = ROCKER_RING_INDEX(addr);
800         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
801 
802         switch (offset) {
803         case ROCKER_DMA_DESC_ADDR_OFFSET:
804             r->lower32 = (uint64_t)val;
805             break;
806         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
807             desc_ring_set_base_addr(r->rings[index],
808                                     ((uint64_t)val) << 32 | r->lower32);
809             r->lower32 = 0;
810             break;
811         case ROCKER_DMA_DESC_SIZE_OFFSET:
812             desc_ring_set_size(r->rings[index], val);
813             break;
814         case ROCKER_DMA_DESC_HEAD_OFFSET:
815             if (desc_ring_set_head(r->rings[index], val)) {
816                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
817             }
818             break;
819         case ROCKER_DMA_DESC_CTRL_OFFSET:
820             desc_ring_set_ctrl(r->rings[index], val);
821             break;
822         case ROCKER_DMA_DESC_CREDITS_OFFSET:
823             if (desc_ring_ret_credits(r->rings[index], val)) {
824                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
825             }
826             break;
827         default:
828             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
829                     " val=0x%08x (ring %d, addr=0x%02x)\n",
830                     addr, val, index, offset);
831             break;
832         }
833         return;
834     }
835 
836     switch (addr) {
837     case ROCKER_TEST_REG:
838         r->test_reg = val;
839         break;
840     case ROCKER_TEST_REG64:
841     case ROCKER_TEST_DMA_ADDR:
842     case ROCKER_PORT_PHYS_ENABLE:
843         r->lower32 = (uint64_t)val;
844         break;
845     case ROCKER_TEST_REG64 + 4:
846         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
847         r->lower32 = 0;
848         break;
849     case ROCKER_TEST_IRQ:
850         rocker_msix_irq(r, val);
851         break;
852     case ROCKER_TEST_DMA_SIZE:
853         r->test_dma_size = val & 0xFFFF;
854         break;
855     case ROCKER_TEST_DMA_ADDR + 4:
856         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
857         r->lower32 = 0;
858         break;
859     case ROCKER_TEST_DMA_CTRL:
860         rocker_test_dma_ctrl(r, val);
861         break;
862     case ROCKER_CONTROL:
863         rocker_control(r, val);
864         break;
865     case ROCKER_PORT_PHYS_ENABLE + 4:
866         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
867         r->lower32 = 0;
868         break;
869     default:
870         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
871                 " val=0x%08x\n", addr, val);
872         break;
873     }
874 }
875 
876 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
877 {
878     Rocker *r = opaque;
879 
880     if (rocker_addr_is_desc_reg(r, addr)) {
881         unsigned index = ROCKER_RING_INDEX(addr);
882         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
883 
884         switch (offset) {
885         case ROCKER_DMA_DESC_ADDR_OFFSET:
886             desc_ring_set_base_addr(r->rings[index], val);
887             break;
888         default:
889             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
890                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
891                     addr, val, index, offset);
892             break;
893         }
894         return;
895     }
896 
897     switch (addr) {
898     case ROCKER_TEST_REG64:
899         r->test_reg64 = val;
900         break;
901     case ROCKER_TEST_DMA_ADDR:
902         r->test_dma_addr = val;
903         break;
904     case ROCKER_PORT_PHYS_ENABLE:
905         rocker_port_phys_enable_write(r, val);
906         break;
907     default:
908         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
909                 " val=0x" TARGET_FMT_plx "\n", addr, val);
910         break;
911     }
912 }
913 
914 #ifdef DEBUG_ROCKER
915 #define regname(reg) case (reg): return #reg
916 static const char *rocker_reg_name(void *opaque, hwaddr addr)
917 {
918     Rocker *r = opaque;
919 
920     if (rocker_addr_is_desc_reg(r, addr)) {
921         unsigned index = ROCKER_RING_INDEX(addr);
922         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
923         static char buf[100];
924         char ring_name[10];
925 
926         switch (index) {
927         case 0:
928             sprintf(ring_name, "cmd");
929             break;
930         case 1:
931             sprintf(ring_name, "event");
932             break;
933         default:
934             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
935                     (index - 2) / 2);
936         }
937 
938         switch (offset) {
939         case ROCKER_DMA_DESC_ADDR_OFFSET:
940             sprintf(buf, "Ring[%s] ADDR", ring_name);
941             return buf;
942         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
943             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
944             return buf;
945         case ROCKER_DMA_DESC_SIZE_OFFSET:
946             sprintf(buf, "Ring[%s] SIZE", ring_name);
947             return buf;
948         case ROCKER_DMA_DESC_HEAD_OFFSET:
949             sprintf(buf, "Ring[%s] HEAD", ring_name);
950             return buf;
951         case ROCKER_DMA_DESC_TAIL_OFFSET:
952             sprintf(buf, "Ring[%s] TAIL", ring_name);
953             return buf;
954         case ROCKER_DMA_DESC_CTRL_OFFSET:
955             sprintf(buf, "Ring[%s] CTRL", ring_name);
956             return buf;
957         case ROCKER_DMA_DESC_CREDITS_OFFSET:
958             sprintf(buf, "Ring[%s] CREDITS", ring_name);
959             return buf;
960         default:
961             sprintf(buf, "Ring[%s] ???", ring_name);
962             return buf;
963         }
964     } else {
965         switch (addr) {
966             regname(ROCKER_BOGUS_REG0);
967             regname(ROCKER_BOGUS_REG1);
968             regname(ROCKER_BOGUS_REG2);
969             regname(ROCKER_BOGUS_REG3);
970             regname(ROCKER_TEST_REG);
971             regname(ROCKER_TEST_REG64);
972             regname(ROCKER_TEST_REG64+4);
973             regname(ROCKER_TEST_IRQ);
974             regname(ROCKER_TEST_DMA_ADDR);
975             regname(ROCKER_TEST_DMA_ADDR+4);
976             regname(ROCKER_TEST_DMA_SIZE);
977             regname(ROCKER_TEST_DMA_CTRL);
978             regname(ROCKER_CONTROL);
979             regname(ROCKER_PORT_PHYS_COUNT);
980             regname(ROCKER_PORT_PHYS_LINK_STATUS);
981             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
982             regname(ROCKER_PORT_PHYS_ENABLE);
983             regname(ROCKER_PORT_PHYS_ENABLE+4);
984             regname(ROCKER_SWITCH_ID);
985             regname(ROCKER_SWITCH_ID+4);
986         }
987     }
988     return "???";
989 }
990 #else
991 static const char *rocker_reg_name(void *opaque, hwaddr addr)
992 {
993     return NULL;
994 }
995 #endif
996 
997 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
998                               unsigned size)
999 {
1000     DPRINTF("Write %s addr " TARGET_FMT_plx
1001             ", size %u, val " TARGET_FMT_plx "\n",
1002             rocker_reg_name(opaque, addr), addr, size, val);
1003 
1004     switch (size) {
1005     case 4:
1006         rocker_io_writel(opaque, addr, val);
1007         break;
1008     case 8:
1009         rocker_io_writeq(opaque, addr, val);
1010         break;
1011     }
1012 }
1013 
1014 static uint64_t rocker_port_phys_link_status(Rocker *r)
1015 {
1016     int i;
1017     uint64_t status = 0;
1018 
1019     for (i = 0; i < r->fp_ports; i++) {
1020         FpPort *port = r->fp_port[i];
1021 
1022         if (fp_port_get_link_up(port)) {
1023             status |= 1 << (i + 1);
1024         }
1025     }
1026     return status;
1027 }
1028 
1029 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1030 {
1031     int i;
1032     uint64_t ret = 0;
1033 
1034     for (i = 0; i < r->fp_ports; i++) {
1035         FpPort *port = r->fp_port[i];
1036 
1037         if (fp_port_enabled(port)) {
1038             ret |= 1 << (i + 1);
1039         }
1040     }
1041     return ret;
1042 }
1043 
1044 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1045 {
1046     Rocker *r = opaque;
1047     uint32_t ret;
1048 
1049     if (rocker_addr_is_desc_reg(r, addr)) {
1050         unsigned index = ROCKER_RING_INDEX(addr);
1051         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1052 
1053         switch (offset) {
1054         case ROCKER_DMA_DESC_ADDR_OFFSET:
1055             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1056             break;
1057         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1058             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1059             break;
1060         case ROCKER_DMA_DESC_SIZE_OFFSET:
1061             ret = desc_ring_get_size(r->rings[index]);
1062             break;
1063         case ROCKER_DMA_DESC_HEAD_OFFSET:
1064             ret = desc_ring_get_head(r->rings[index]);
1065             break;
1066         case ROCKER_DMA_DESC_TAIL_OFFSET:
1067             ret = desc_ring_get_tail(r->rings[index]);
1068             break;
1069         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1070             ret = desc_ring_get_credits(r->rings[index]);
1071             break;
1072         default:
1073             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1074                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1075             ret = 0;
1076             break;
1077         }
1078         return ret;
1079     }
1080 
1081     switch (addr) {
1082     case ROCKER_BOGUS_REG0:
1083     case ROCKER_BOGUS_REG1:
1084     case ROCKER_BOGUS_REG2:
1085     case ROCKER_BOGUS_REG3:
1086         ret = 0xDEADBABE;
1087         break;
1088     case ROCKER_TEST_REG:
1089         ret = r->test_reg * 2;
1090         break;
1091     case ROCKER_TEST_REG64:
1092         ret = (uint32_t)(r->test_reg64 * 2);
1093         break;
1094     case ROCKER_TEST_REG64 + 4:
1095         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1096         break;
1097     case ROCKER_TEST_DMA_SIZE:
1098         ret = r->test_dma_size;
1099         break;
1100     case ROCKER_TEST_DMA_ADDR:
1101         ret = (uint32_t)r->test_dma_addr;
1102         break;
1103     case ROCKER_TEST_DMA_ADDR + 4:
1104         ret = (uint32_t)(r->test_dma_addr >> 32);
1105         break;
1106     case ROCKER_PORT_PHYS_COUNT:
1107         ret = r->fp_ports;
1108         break;
1109     case ROCKER_PORT_PHYS_LINK_STATUS:
1110         ret = (uint32_t)rocker_port_phys_link_status(r);
1111         break;
1112     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1113         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1114         break;
1115     case ROCKER_PORT_PHYS_ENABLE:
1116         ret = (uint32_t)rocker_port_phys_enable_read(r);
1117         break;
1118     case ROCKER_PORT_PHYS_ENABLE + 4:
1119         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1120         break;
1121     case ROCKER_SWITCH_ID:
1122         ret = (uint32_t)r->switch_id;
1123         break;
1124     case ROCKER_SWITCH_ID + 4:
1125         ret = (uint32_t)(r->switch_id >> 32);
1126         break;
1127     default:
1128         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1129         ret = 0;
1130         break;
1131     }
1132     return ret;
1133 }
1134 
1135 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1136 {
1137     Rocker *r = opaque;
1138     uint64_t ret;
1139 
1140     if (rocker_addr_is_desc_reg(r, addr)) {
1141         unsigned index = ROCKER_RING_INDEX(addr);
1142         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1143 
1144         switch (addr & ROCKER_DMA_DESC_MASK) {
1145         case ROCKER_DMA_DESC_ADDR_OFFSET:
1146             ret = desc_ring_get_base_addr(r->rings[index]);
1147             break;
1148         default:
1149             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1150                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1151             ret = 0;
1152             break;
1153         }
1154         return ret;
1155     }
1156 
1157     switch (addr) {
1158     case ROCKER_BOGUS_REG0:
1159     case ROCKER_BOGUS_REG2:
1160         ret = 0xDEADBABEDEADBABEULL;
1161         break;
1162     case ROCKER_TEST_REG64:
1163         ret = r->test_reg64 * 2;
1164         break;
1165     case ROCKER_TEST_DMA_ADDR:
1166         ret = r->test_dma_addr;
1167         break;
1168     case ROCKER_PORT_PHYS_LINK_STATUS:
1169         ret = rocker_port_phys_link_status(r);
1170         break;
1171     case ROCKER_PORT_PHYS_ENABLE:
1172         ret = rocker_port_phys_enable_read(r);
1173         break;
1174     case ROCKER_SWITCH_ID:
1175         ret = r->switch_id;
1176         break;
1177     default:
1178         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1179         ret = 0;
1180         break;
1181     }
1182     return ret;
1183 }
1184 
1185 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1186 {
1187     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1188             rocker_reg_name(opaque, addr), addr, size);
1189 
1190     switch (size) {
1191     case 4:
1192         return rocker_io_readl(opaque, addr);
1193     case 8:
1194         return rocker_io_readq(opaque, addr);
1195     }
1196 
1197     return -1;
1198 }
1199 
1200 static const MemoryRegionOps rocker_mmio_ops = {
1201     .read = rocker_mmio_read,
1202     .write = rocker_mmio_write,
1203     .endianness = DEVICE_LITTLE_ENDIAN,
1204     .valid = {
1205         .min_access_size = 4,
1206         .max_access_size = 8,
1207     },
1208     .impl = {
1209         .min_access_size = 4,
1210         .max_access_size = 8,
1211     },
1212 };
1213 
1214 static void rocker_msix_vectors_unuse(Rocker *r,
1215                                       unsigned int num_vectors)
1216 {
1217     PCIDevice *dev = PCI_DEVICE(r);
1218     int i;
1219 
1220     for (i = 0; i < num_vectors; i++) {
1221         msix_vector_unuse(dev, i);
1222     }
1223 }
1224 
1225 static int rocker_msix_vectors_use(Rocker *r,
1226                                    unsigned int num_vectors)
1227 {
1228     PCIDevice *dev = PCI_DEVICE(r);
1229     int err;
1230     int i;
1231 
1232     for (i = 0; i < num_vectors; i++) {
1233         err = msix_vector_use(dev, i);
1234         if (err) {
1235             goto rollback;
1236         }
1237     }
1238     return 0;
1239 
1240 rollback:
1241     rocker_msix_vectors_unuse(r, i);
1242     return err;
1243 }
1244 
1245 static int rocker_msix_init(Rocker *r, Error **errp)
1246 {
1247     PCIDevice *dev = PCI_DEVICE(r);
1248     int err;
1249 
1250     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1251                     &r->msix_bar,
1252                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1253                     &r->msix_bar,
1254                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1255                     0, errp);
1256     if (err) {
1257         return err;
1258     }
1259 
1260     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1261     if (err) {
1262         goto err_msix_vectors_use;
1263     }
1264 
1265     return 0;
1266 
1267 err_msix_vectors_use:
1268     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1269     return err;
1270 }
1271 
1272 static void rocker_msix_uninit(Rocker *r)
1273 {
1274     PCIDevice *dev = PCI_DEVICE(r);
1275 
1276     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1277     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1278 }
1279 
1280 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1281 {
1282     int i;
1283 
1284     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1285         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1286             return r->worlds[i];
1287         }
1288     }
1289     return NULL;
1290 }
1291 
1292 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1293 {
1294     Rocker *r = ROCKER(dev);
1295     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1296     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1297     static int sw_index;
1298     int i, err = 0;
1299 
1300     /* allocate worlds */
1301 
1302     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1303 
1304     if (!r->world_name) {
1305         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1306     }
1307 
1308     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1309     if (!r->world_dflt) {
1310         error_setg(errp,
1311                 "invalid argument requested world %s does not exist",
1312                 r->world_name);
1313         goto err_world_type_by_name;
1314     }
1315 
1316     /* set up memory-mapped region at BAR0 */
1317 
1318     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1319                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1320     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1321                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1322 
1323     /* set up memory-mapped region for MSI-X */
1324 
1325     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1326                        ROCKER_PCI_MSIX_BAR_SIZE);
1327     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1328                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1329 
1330     /* MSI-X init */
1331 
1332     err = rocker_msix_init(r, errp);
1333     if (err) {
1334         goto err_msix_init;
1335     }
1336 
1337     /* validate switch properties */
1338 
1339     if (!r->name) {
1340         r->name = g_strdup(TYPE_ROCKER);
1341     }
1342 
1343     if (rocker_find(r->name)) {
1344         error_setg(errp, "%s already exists", r->name);
1345         goto err_duplicate;
1346     }
1347 
1348     /* Rocker name is passed in port name requests to OS with the intention
1349      * that the name is used in interface names. Limit the length of the
1350      * rocker name to avoid naming problems in the OS. Also, adding the
1351      * port number as p# and unganged breakout b#, where # is at most 2
1352      * digits, so leave room for it too (-1 for string terminator, -3 for
1353      * p# and -3 for b#)
1354      */
1355 #define ROCKER_IFNAMSIZ 16
1356 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1357     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1358         error_setg(errp,
1359                 "name too long; please shorten to at most %d chars",
1360                 MAX_ROCKER_NAME_LEN);
1361         goto err_name_too_long;
1362     }
1363 
1364     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1365         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1366         r->fp_start_macaddr.a[4] += (sw_index++);
1367     }
1368 
1369     if (!r->switch_id) {
1370         memcpy(&r->switch_id, &r->fp_start_macaddr,
1371                sizeof(r->fp_start_macaddr));
1372     }
1373 
1374     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1375         r->fp_ports = ROCKER_FP_PORTS_MAX;
1376     }
1377 
1378     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1379 
1380     /* Rings are ordered like this:
1381      * - command ring
1382      * - event ring
1383      * - port0 tx ring
1384      * - port0 rx ring
1385      * - port1 tx ring
1386      * - port1 rx ring
1387      * .....
1388      */
1389 
1390     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1391         DescRing *ring = desc_ring_alloc(r, i);
1392 
1393         if (i == ROCKER_RING_CMD) {
1394             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1395         } else if (i == ROCKER_RING_EVENT) {
1396             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1397         } else if (i % 2 == 0) {
1398             desc_ring_set_consume(ring, tx_consume,
1399                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1400         } else if (i % 2 == 1) {
1401             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1402         }
1403 
1404         r->rings[i] = ring;
1405     }
1406 
1407     for (i = 0; i < r->fp_ports; i++) {
1408         FpPort *port =
1409             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1410                           i, &r->fp_ports_peers[i]);
1411 
1412         r->fp_port[i] = port;
1413         fp_port_set_world(port, r->world_dflt);
1414     }
1415 
1416     QLIST_INSERT_HEAD(&rockers, r, next);
1417 
1418     return;
1419 
1420 err_name_too_long:
1421 err_duplicate:
1422     rocker_msix_uninit(r);
1423 err_msix_init:
1424     object_unparent(OBJECT(&r->msix_bar));
1425     object_unparent(OBJECT(&r->mmio));
1426 err_world_type_by_name:
1427     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1428         if (r->worlds[i]) {
1429             world_free(r->worlds[i]);
1430         }
1431     }
1432 }
1433 
1434 static void pci_rocker_uninit(PCIDevice *dev)
1435 {
1436     Rocker *r = ROCKER(dev);
1437     int i;
1438 
1439     QLIST_REMOVE(r, next);
1440 
1441     for (i = 0; i < r->fp_ports; i++) {
1442         FpPort *port = r->fp_port[i];
1443 
1444         fp_port_free(port);
1445         r->fp_port[i] = NULL;
1446     }
1447 
1448     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1449         if (r->rings[i]) {
1450             desc_ring_free(r->rings[i]);
1451         }
1452     }
1453     g_free(r->rings);
1454 
1455     rocker_msix_uninit(r);
1456     object_unparent(OBJECT(&r->msix_bar));
1457     object_unparent(OBJECT(&r->mmio));
1458 
1459     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1460         if (r->worlds[i]) {
1461             world_free(r->worlds[i]);
1462         }
1463     }
1464     g_free(r->fp_ports_peers);
1465 }
1466 
1467 static void rocker_reset(DeviceState *dev)
1468 {
1469     Rocker *r = ROCKER(dev);
1470     int i;
1471 
1472     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1473         if (r->worlds[i]) {
1474             world_reset(r->worlds[i]);
1475         }
1476     }
1477     for (i = 0; i < r->fp_ports; i++) {
1478         fp_port_reset(r->fp_port[i]);
1479         fp_port_set_world(r->fp_port[i], r->world_dflt);
1480     }
1481 
1482     r->test_reg = 0;
1483     r->test_reg64 = 0;
1484     r->test_dma_addr = 0;
1485     r->test_dma_size = 0;
1486 
1487     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1488         desc_ring_reset(r->rings[i]);
1489     }
1490 
1491     DPRINTF("Reset done\n");
1492 }
1493 
1494 static Property rocker_properties[] = {
1495     DEFINE_PROP_STRING("name", Rocker, name),
1496     DEFINE_PROP_STRING("world", Rocker, world_name),
1497     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1498                         fp_start_macaddr),
1499     DEFINE_PROP_UINT64("switch_id", Rocker,
1500                        switch_id, 0),
1501     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1502                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1503     DEFINE_PROP_END_OF_LIST(),
1504 };
1505 
1506 static const VMStateDescription rocker_vmsd = {
1507     .name = TYPE_ROCKER,
1508     .unmigratable = 1,
1509 };
1510 
1511 static void rocker_class_init(ObjectClass *klass, void *data)
1512 {
1513     DeviceClass *dc = DEVICE_CLASS(klass);
1514     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1515 
1516     k->realize = pci_rocker_realize;
1517     k->exit = pci_rocker_uninit;
1518     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1519     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1520     k->revision = ROCKER_PCI_REVISION;
1521     k->class_id = PCI_CLASS_NETWORK_OTHER;
1522     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1523     dc->desc = "Rocker Switch";
1524     dc->reset = rocker_reset;
1525     device_class_set_props(dc, rocker_properties);
1526     dc->vmsd = &rocker_vmsd;
1527 }
1528 
1529 static const TypeInfo rocker_info = {
1530     .name          = TYPE_ROCKER,
1531     .parent        = TYPE_PCI_DEVICE,
1532     .instance_size = sizeof(Rocker),
1533     .class_init    = rocker_class_init,
1534     .interfaces = (InterfaceInfo[]) {
1535         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1536         { },
1537     },
1538 };
1539 
1540 static void rocker_register_types(void)
1541 {
1542     type_register_static(&rocker_info);
1543 }
1544 
1545 type_init(rocker_register_types)
1546