xref: /qemu/hw/net/rocker/rocker.c (revision b25f23e7)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/msix.h"
22 #include "net/net.h"
23 #include "net/eth.h"
24 #include "qemu/iov.h"
25 #include "qemu/bitops.h"
26 #include "qmp-commands.h"
27 
28 #include "rocker.h"
29 #include "rocker_hw.h"
30 #include "rocker_fp.h"
31 #include "rocker_desc.h"
32 #include "rocker_tlv.h"
33 #include "rocker_world.h"
34 #include "rocker_of_dpa.h"
35 
36 struct rocker {
37     /* private */
38     PCIDevice parent_obj;
39     /* public */
40 
41     MemoryRegion mmio;
42     MemoryRegion msix_bar;
43 
44     /* switch configuration */
45     char *name;                  /* switch name */
46     char *world_name;            /* world name */
47     uint32_t fp_ports;           /* front-panel port count */
48     NICPeers *fp_ports_peers;
49     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
50     uint64_t switch_id;          /* switch id */
51 
52     /* front-panel ports */
53     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
54 
55     /* register backings */
56     uint32_t test_reg;
57     uint64_t test_reg64;
58     dma_addr_t test_dma_addr;
59     uint32_t test_dma_size;
60     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
61 
62     /* desc rings */
63     DescRing **rings;
64 
65     /* switch worlds */
66     World *worlds[ROCKER_WORLD_TYPE_MAX];
67     World *world_dflt;
68 
69     QLIST_ENTRY(rocker) next;
70 };
71 
72 #define ROCKER "rocker"
73 
74 #define to_rocker(obj) \
75     OBJECT_CHECK(Rocker, (obj), ROCKER)
76 
77 static QLIST_HEAD(, rocker) rockers;
78 
79 Rocker *rocker_find(const char *name)
80 {
81     Rocker *r;
82 
83     QLIST_FOREACH(r, &rockers, next)
84         if (strcmp(r->name, name) == 0) {
85             return r;
86         }
87 
88     return NULL;
89 }
90 
91 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
92 {
93     if (type < ROCKER_WORLD_TYPE_MAX) {
94         return r->worlds[type];
95     }
96     return NULL;
97 }
98 
99 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
100 {
101     RockerSwitch *rocker;
102     Rocker *r;
103 
104     r = rocker_find(name);
105     if (!r) {
106         error_setg(errp, "rocker %s not found", name);
107         return NULL;
108     }
109 
110     rocker = g_new0(RockerSwitch, 1);
111     rocker->name = g_strdup(r->name);
112     rocker->id = r->switch_id;
113     rocker->ports = r->fp_ports;
114 
115     return rocker;
116 }
117 
118 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
119 {
120     RockerPortList *list = NULL;
121     Rocker *r;
122     int i;
123 
124     r = rocker_find(name);
125     if (!r) {
126         error_setg(errp, "rocker %s not found", name);
127         return NULL;
128     }
129 
130     for (i = r->fp_ports - 1; i >= 0; i--) {
131         RockerPortList *info = g_malloc0(sizeof(*info));
132         info->value = g_malloc0(sizeof(*info->value));
133         struct fp_port *port = r->fp_port[i];
134 
135         fp_port_get_info(port, info);
136         info->next = list;
137         list = info;
138     }
139 
140     return list;
141 }
142 
143 uint32_t rocker_fp_ports(Rocker *r)
144 {
145     return r->fp_ports;
146 }
147 
148 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
149                                             DescRing *ring)
150 {
151     return (desc_ring_index(ring) - 2) / 2 + 1;
152 }
153 
154 static int tx_consume(Rocker *r, DescInfo *info)
155 {
156     PCIDevice *dev = PCI_DEVICE(r);
157     char *buf = desc_get_buf(info, true);
158     RockerTlv *tlv_frag;
159     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
160     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
161     uint32_t pport;
162     uint32_t port;
163     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
164     uint16_t tx_l3_csum_off = 0;
165     uint16_t tx_tso_mss = 0;
166     uint16_t tx_tso_hdr_len = 0;
167     int iovcnt = 0;
168     int err = ROCKER_OK;
169     int rem;
170     int i;
171 
172     if (!buf) {
173         return -ROCKER_ENXIO;
174     }
175 
176     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
177 
178     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
179         return -ROCKER_EINVAL;
180     }
181 
182     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
183     if (!fp_port_from_pport(pport, &port)) {
184         return -ROCKER_EINVAL;
185     }
186 
187     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
188         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
189     }
190 
191     switch (tx_offload) {
192     case ROCKER_TX_OFFLOAD_L3_CSUM:
193         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
194             return -ROCKER_EINVAL;
195         }
196         break;
197     case ROCKER_TX_OFFLOAD_TSO:
198         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
199             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
200             return -ROCKER_EINVAL;
201         }
202         break;
203     }
204 
205     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
206         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
207     }
208 
209     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
210         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
211     }
212 
213     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
214         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
215     }
216 
217     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
218         hwaddr frag_addr;
219         uint16_t frag_len;
220 
221         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
222             err = -ROCKER_EINVAL;
223             goto err_bad_attr;
224         }
225 
226         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
227 
228         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
229             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
230             err = -ROCKER_EINVAL;
231             goto err_bad_attr;
232         }
233 
234         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
235         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
236 
237         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
238             goto err_too_many_frags;
239         }
240         iov[iovcnt].iov_len = frag_len;
241         iov[iovcnt].iov_base = g_malloc(frag_len);
242         if (!iov[iovcnt].iov_base) {
243             err = -ROCKER_ENOMEM;
244             goto err_no_mem;
245         }
246 
247         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
248                      iov[iovcnt].iov_len)) {
249             err = -ROCKER_ENXIO;
250             goto err_bad_io;
251         }
252         iovcnt++;
253     }
254 
255     if (iovcnt) {
256         /* XXX perform Tx offloads */
257         /* XXX   silence compiler for now */
258         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
259     }
260 
261     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
262 
263 err_too_many_frags:
264 err_bad_io:
265 err_no_mem:
266 err_bad_attr:
267     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
268         g_free(iov[i].iov_base);
269     }
270 
271     return err;
272 }
273 
274 static int cmd_get_port_settings(Rocker *r,
275                                  DescInfo *info, char *buf,
276                                  RockerTlv *cmd_info_tlv)
277 {
278     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
279     RockerTlv *nest;
280     FpPort *fp_port;
281     uint32_t pport;
282     uint32_t port;
283     uint32_t speed;
284     uint8_t duplex;
285     uint8_t autoneg;
286     uint8_t learning;
287     char *phys_name;
288     MACAddr macaddr;
289     enum rocker_world_type mode;
290     size_t tlv_size;
291     int pos;
292     int err;
293 
294     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
295                             cmd_info_tlv);
296 
297     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
298         return -ROCKER_EINVAL;
299     }
300 
301     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
302     if (!fp_port_from_pport(pport, &port)) {
303         return -ROCKER_EINVAL;
304     }
305     fp_port = r->fp_port[port];
306 
307     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
308     if (err) {
309         return err;
310     }
311 
312     fp_port_get_macaddr(fp_port, &macaddr);
313     mode = world_type(fp_port_get_world(fp_port));
314     learning = fp_port_get_learning(fp_port);
315     phys_name = fp_port_get_name(fp_port);
316 
317     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
318                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
319                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
320                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
321                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
322                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
323                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
324                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
325                rocker_tlv_total_size(strlen(phys_name));
326 
327     if (tlv_size > desc_buf_size(info)) {
328         return -ROCKER_EMSGSIZE;
329     }
330 
331     pos = 0;
332     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
333     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
334     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
335     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
336     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
337     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
338                    sizeof(macaddr.a), macaddr.a);
339     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
340     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
341                       learning);
342     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
343                    strlen(phys_name), phys_name);
344     rocker_tlv_nest_end(buf, &pos, nest);
345 
346     return desc_set_buf(info, tlv_size);
347 }
348 
349 static int cmd_set_port_settings(Rocker *r,
350                                  RockerTlv *cmd_info_tlv)
351 {
352     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
353     FpPort *fp_port;
354     uint32_t pport;
355     uint32_t port;
356     uint32_t speed;
357     uint8_t duplex;
358     uint8_t autoneg;
359     uint8_t learning;
360     MACAddr macaddr;
361     enum rocker_world_type mode;
362     int err;
363 
364     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
365                             cmd_info_tlv);
366 
367     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
368         return -ROCKER_EINVAL;
369     }
370 
371     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
372     if (!fp_port_from_pport(pport, &port)) {
373         return -ROCKER_EINVAL;
374     }
375     fp_port = r->fp_port[port];
376 
377     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
378         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
379         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
380 
381         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
382         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
383         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
384 
385         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
386         if (err) {
387             return err;
388         }
389     }
390 
391     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
392         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
393             sizeof(macaddr.a)) {
394             return -ROCKER_EINVAL;
395         }
396         memcpy(macaddr.a,
397                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
398                sizeof(macaddr.a));
399         fp_port_set_macaddr(fp_port, &macaddr);
400     }
401 
402     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
403         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
404         if (mode >= ROCKER_WORLD_TYPE_MAX) {
405             return -ROCKER_EINVAL;
406         }
407         /* We don't support world change. */
408         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
409             return -ROCKER_EINVAL;
410         }
411     }
412 
413     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
414         learning =
415             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
416         fp_port_set_learning(fp_port, learning);
417     }
418 
419     return ROCKER_OK;
420 }
421 
422 static int cmd_consume(Rocker *r, DescInfo *info)
423 {
424     char *buf = desc_get_buf(info, false);
425     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
426     RockerTlv *info_tlv;
427     World *world;
428     uint16_t cmd;
429     int err;
430 
431     if (!buf) {
432         return -ROCKER_ENXIO;
433     }
434 
435     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
436 
437     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
438         return -ROCKER_EINVAL;
439     }
440 
441     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
442     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
443 
444     /* This might be reworked to something like this:
445      * Every world will have an array of command handlers from
446      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
447      * up to each world to implement whatever command it want.
448      * It can reference "generic" commands as cmd_set_port_settings or
449      * cmd_get_port_settings
450      */
451 
452     switch (cmd) {
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
454     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
455     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
456     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
457     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
458     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
459     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
460     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
461         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
462         err = world_do_cmd(world, info, buf, cmd, info_tlv);
463         break;
464     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
465         err = cmd_get_port_settings(r, info, buf, info_tlv);
466         break;
467     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
468         err = cmd_set_port_settings(r, info_tlv);
469         break;
470     default:
471         err = -ROCKER_EINVAL;
472         break;
473     }
474 
475     return err;
476 }
477 
478 static void rocker_msix_irq(Rocker *r, unsigned vector)
479 {
480     PCIDevice *dev = PCI_DEVICE(r);
481 
482     DPRINTF("MSI-X notify request for vector %d\n", vector);
483     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
484         DPRINTF("incorrect vector %d\n", vector);
485         return;
486     }
487     msix_notify(dev, vector);
488 }
489 
490 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
491 {
492     DescRing *ring = r->rings[ROCKER_RING_EVENT];
493     DescInfo *info = desc_ring_fetch_desc(ring);
494     RockerTlv *nest;
495     char *buf;
496     size_t tlv_size;
497     int pos;
498     int err;
499 
500     if (!info) {
501         return -ROCKER_ENOBUFS;
502     }
503 
504     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
505                rocker_tlv_total_size(0) +                 /* nest */
506                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
507                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
508 
509     if (tlv_size > desc_buf_size(info)) {
510         err = -ROCKER_EMSGSIZE;
511         goto err_too_big;
512     }
513 
514     buf = desc_get_buf(info, false);
515     if (!buf) {
516         err = -ROCKER_ENOMEM;
517         goto err_no_mem;
518     }
519 
520     pos = 0;
521     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
522                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
523     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
524     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
525     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
526                       link_up ? 1 : 0);
527     rocker_tlv_nest_end(buf, &pos, nest);
528 
529     err = desc_set_buf(info, tlv_size);
530 
531 err_too_big:
532 err_no_mem:
533     if (desc_ring_post_desc(ring, err)) {
534         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
535     }
536 
537     return err;
538 }
539 
540 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
541                                uint16_t vlan_id)
542 {
543     DescRing *ring = r->rings[ROCKER_RING_EVENT];
544     DescInfo *info;
545     FpPort *fp_port;
546     uint32_t port;
547     RockerTlv *nest;
548     char *buf;
549     size_t tlv_size;
550     int pos;
551     int err;
552 
553     if (!fp_port_from_pport(pport, &port)) {
554         return -ROCKER_EINVAL;
555     }
556     fp_port = r->fp_port[port];
557     if (!fp_port_get_learning(fp_port)) {
558         return ROCKER_OK;
559     }
560 
561     info = desc_ring_fetch_desc(ring);
562     if (!info) {
563         return -ROCKER_ENOBUFS;
564     }
565 
566     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
567                rocker_tlv_total_size(0) +                 /* nest */
568                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
569                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
570                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
571 
572     if (tlv_size > desc_buf_size(info)) {
573         err = -ROCKER_EMSGSIZE;
574         goto err_too_big;
575     }
576 
577     buf = desc_get_buf(info, false);
578     if (!buf) {
579         err = -ROCKER_ENOMEM;
580         goto err_no_mem;
581     }
582 
583     pos = 0;
584     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
585                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
586     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
587     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
588     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
589     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
590     rocker_tlv_nest_end(buf, &pos, nest);
591 
592     err = desc_set_buf(info, tlv_size);
593 
594 err_too_big:
595 err_no_mem:
596     if (desc_ring_post_desc(ring, err)) {
597         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
598     }
599 
600     return err;
601 }
602 
603 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
604                                                      uint32_t pport)
605 {
606     return r->rings[(pport - 1) * 2 + 3];
607 }
608 
609 int rx_produce(World *world, uint32_t pport,
610                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
611 {
612     Rocker *r = world_rocker(world);
613     PCIDevice *dev = (PCIDevice *)r;
614     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
615     DescInfo *info = desc_ring_fetch_desc(ring);
616     char *data;
617     size_t data_size = iov_size(iov, iovcnt);
618     char *buf;
619     uint16_t rx_flags = 0;
620     uint16_t rx_csum = 0;
621     size_t tlv_size;
622     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
623     hwaddr frag_addr;
624     uint16_t frag_max_len;
625     int pos;
626     int err;
627 
628     if (!info) {
629         return -ROCKER_ENOBUFS;
630     }
631 
632     buf = desc_get_buf(info, false);
633     if (!buf) {
634         err = -ROCKER_ENXIO;
635         goto out;
636     }
637     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
638 
639     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
640         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
641         err = -ROCKER_EINVAL;
642         goto out;
643     }
644 
645     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
646     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
647 
648     if (data_size > frag_max_len) {
649         err = -ROCKER_EMSGSIZE;
650         goto out;
651     }
652 
653     if (copy_to_cpu) {
654         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
655     }
656 
657     /* XXX calc rx flags/csum */
658 
659     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
660                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
661                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
662                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
663                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
664 
665     if (tlv_size > desc_buf_size(info)) {
666         err = -ROCKER_EMSGSIZE;
667         goto out;
668     }
669 
670     /* TODO:
671      * iov dma write can be optimized in similar way e1000 does it in
672      * e1000_receive_iov. But maybe if would make sense to introduce
673      * generic helper iov_dma_write.
674      */
675 
676     data = g_malloc(data_size);
677     if (!data) {
678         err = -ROCKER_ENOMEM;
679         goto out;
680     }
681     iov_to_buf(iov, iovcnt, 0, data, data_size);
682     pci_dma_write(dev, frag_addr, data, data_size);
683     g_free(data);
684 
685     pos = 0;
686     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
687     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
688     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
689     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
690     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
691 
692     err = desc_set_buf(info, tlv_size);
693 
694 out:
695     if (desc_ring_post_desc(ring, err)) {
696         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
697     }
698 
699     return err;
700 }
701 
702 int rocker_port_eg(Rocker *r, uint32_t pport,
703                    const struct iovec *iov, int iovcnt)
704 {
705     FpPort *fp_port;
706     uint32_t port;
707 
708     if (!fp_port_from_pport(pport, &port)) {
709         return -ROCKER_EINVAL;
710     }
711 
712     fp_port = r->fp_port[port];
713 
714     return fp_port_eg(fp_port, iov, iovcnt);
715 }
716 
717 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
718 {
719     PCIDevice *dev = PCI_DEVICE(r);
720     char *buf;
721     int i;
722 
723     buf = g_malloc(r->test_dma_size);
724 
725     if (!buf) {
726         DPRINTF("test dma buffer alloc failed");
727         return;
728     }
729 
730     switch (val) {
731     case ROCKER_TEST_DMA_CTRL_CLEAR:
732         memset(buf, 0, r->test_dma_size);
733         break;
734     case ROCKER_TEST_DMA_CTRL_FILL:
735         memset(buf, 0x96, r->test_dma_size);
736         break;
737     case ROCKER_TEST_DMA_CTRL_INVERT:
738         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
739         for (i = 0; i < r->test_dma_size; i++) {
740             buf[i] = ~buf[i];
741         }
742         break;
743     default:
744         DPRINTF("not test dma control val=0x%08x\n", val);
745         goto err_out;
746     }
747     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
748 
749     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
750 
751 err_out:
752     g_free(buf);
753 }
754 
755 static void rocker_reset(DeviceState *dev);
756 
757 static void rocker_control(Rocker *r, uint32_t val)
758 {
759     if (val & ROCKER_CONTROL_RESET) {
760         rocker_reset(DEVICE(r));
761     }
762 }
763 
764 static int rocker_pci_ring_count(Rocker *r)
765 {
766     /* There are:
767      * - command ring
768      * - event ring
769      * - tx and rx ring per each port
770      */
771     return 2 + (2 * r->fp_ports);
772 }
773 
774 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
775 {
776     hwaddr start = ROCKER_DMA_DESC_BASE;
777     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
778 
779     return addr >= start && addr < end;
780 }
781 
782 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
783 {
784     int i;
785     bool old_enabled;
786     bool new_enabled;
787     FpPort *fp_port;
788 
789     for (i = 0; i < r->fp_ports; i++) {
790         fp_port = r->fp_port[i];
791         old_enabled = fp_port_enabled(fp_port);
792         new_enabled = (new >> (i + 1)) & 0x1;
793         if (new_enabled == old_enabled) {
794             continue;
795         }
796         if (new_enabled) {
797             fp_port_enable(r->fp_port[i]);
798         } else {
799             fp_port_disable(r->fp_port[i]);
800         }
801     }
802 }
803 
804 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
805 {
806     Rocker *r = opaque;
807 
808     if (rocker_addr_is_desc_reg(r, addr)) {
809         unsigned index = ROCKER_RING_INDEX(addr);
810         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
811 
812         switch (offset) {
813         case ROCKER_DMA_DESC_ADDR_OFFSET:
814             r->lower32 = (uint64_t)val;
815             break;
816         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
817             desc_ring_set_base_addr(r->rings[index],
818                                     ((uint64_t)val) << 32 | r->lower32);
819             r->lower32 = 0;
820             break;
821         case ROCKER_DMA_DESC_SIZE_OFFSET:
822             desc_ring_set_size(r->rings[index], val);
823             break;
824         case ROCKER_DMA_DESC_HEAD_OFFSET:
825             if (desc_ring_set_head(r->rings[index], val)) {
826                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
827             }
828             break;
829         case ROCKER_DMA_DESC_CTRL_OFFSET:
830             desc_ring_set_ctrl(r->rings[index], val);
831             break;
832         case ROCKER_DMA_DESC_CREDITS_OFFSET:
833             if (desc_ring_ret_credits(r->rings[index], val)) {
834                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
835             }
836             break;
837         default:
838             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
839                     " val=0x%08x (ring %d, addr=0x%02x)\n",
840                     addr, val, index, offset);
841             break;
842         }
843         return;
844     }
845 
846     switch (addr) {
847     case ROCKER_TEST_REG:
848         r->test_reg = val;
849         break;
850     case ROCKER_TEST_REG64:
851     case ROCKER_TEST_DMA_ADDR:
852     case ROCKER_PORT_PHYS_ENABLE:
853         r->lower32 = (uint64_t)val;
854         break;
855     case ROCKER_TEST_REG64 + 4:
856         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
857         r->lower32 = 0;
858         break;
859     case ROCKER_TEST_IRQ:
860         rocker_msix_irq(r, val);
861         break;
862     case ROCKER_TEST_DMA_SIZE:
863         r->test_dma_size = val & 0xFFFF;
864         break;
865     case ROCKER_TEST_DMA_ADDR + 4:
866         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
867         r->lower32 = 0;
868         break;
869     case ROCKER_TEST_DMA_CTRL:
870         rocker_test_dma_ctrl(r, val);
871         break;
872     case ROCKER_CONTROL:
873         rocker_control(r, val);
874         break;
875     case ROCKER_PORT_PHYS_ENABLE + 4:
876         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
877         r->lower32 = 0;
878         break;
879     default:
880         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
881                 " val=0x%08x\n", addr, val);
882         break;
883     }
884 }
885 
886 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
887 {
888     Rocker *r = opaque;
889 
890     if (rocker_addr_is_desc_reg(r, addr)) {
891         unsigned index = ROCKER_RING_INDEX(addr);
892         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
893 
894         switch (offset) {
895         case ROCKER_DMA_DESC_ADDR_OFFSET:
896             desc_ring_set_base_addr(r->rings[index], val);
897             break;
898         default:
899             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
900                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
901                     addr, val, index, offset);
902             break;
903         }
904         return;
905     }
906 
907     switch (addr) {
908     case ROCKER_TEST_REG64:
909         r->test_reg64 = val;
910         break;
911     case ROCKER_TEST_DMA_ADDR:
912         r->test_dma_addr = val;
913         break;
914     case ROCKER_PORT_PHYS_ENABLE:
915         rocker_port_phys_enable_write(r, val);
916         break;
917     default:
918         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
919                 " val=0x" TARGET_FMT_plx "\n", addr, val);
920         break;
921     }
922 }
923 
924 #ifdef DEBUG_ROCKER
925 #define regname(reg) case (reg): return #reg
926 static const char *rocker_reg_name(void *opaque, hwaddr addr)
927 {
928     Rocker *r = opaque;
929 
930     if (rocker_addr_is_desc_reg(r, addr)) {
931         unsigned index = ROCKER_RING_INDEX(addr);
932         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
933         static char buf[100];
934         char ring_name[10];
935 
936         switch (index) {
937         case 0:
938             sprintf(ring_name, "cmd");
939             break;
940         case 1:
941             sprintf(ring_name, "event");
942             break;
943         default:
944             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
945                     (index - 2) / 2);
946         }
947 
948         switch (offset) {
949         case ROCKER_DMA_DESC_ADDR_OFFSET:
950             sprintf(buf, "Ring[%s] ADDR", ring_name);
951             return buf;
952         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
953             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
954             return buf;
955         case ROCKER_DMA_DESC_SIZE_OFFSET:
956             sprintf(buf, "Ring[%s] SIZE", ring_name);
957             return buf;
958         case ROCKER_DMA_DESC_HEAD_OFFSET:
959             sprintf(buf, "Ring[%s] HEAD", ring_name);
960             return buf;
961         case ROCKER_DMA_DESC_TAIL_OFFSET:
962             sprintf(buf, "Ring[%s] TAIL", ring_name);
963             return buf;
964         case ROCKER_DMA_DESC_CTRL_OFFSET:
965             sprintf(buf, "Ring[%s] CTRL", ring_name);
966             return buf;
967         case ROCKER_DMA_DESC_CREDITS_OFFSET:
968             sprintf(buf, "Ring[%s] CREDITS", ring_name);
969             return buf;
970         default:
971             sprintf(buf, "Ring[%s] ???", ring_name);
972             return buf;
973         }
974     } else {
975         switch (addr) {
976             regname(ROCKER_BOGUS_REG0);
977             regname(ROCKER_BOGUS_REG1);
978             regname(ROCKER_BOGUS_REG2);
979             regname(ROCKER_BOGUS_REG3);
980             regname(ROCKER_TEST_REG);
981             regname(ROCKER_TEST_REG64);
982             regname(ROCKER_TEST_REG64+4);
983             regname(ROCKER_TEST_IRQ);
984             regname(ROCKER_TEST_DMA_ADDR);
985             regname(ROCKER_TEST_DMA_ADDR+4);
986             regname(ROCKER_TEST_DMA_SIZE);
987             regname(ROCKER_TEST_DMA_CTRL);
988             regname(ROCKER_CONTROL);
989             regname(ROCKER_PORT_PHYS_COUNT);
990             regname(ROCKER_PORT_PHYS_LINK_STATUS);
991             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
992             regname(ROCKER_PORT_PHYS_ENABLE);
993             regname(ROCKER_PORT_PHYS_ENABLE+4);
994             regname(ROCKER_SWITCH_ID);
995             regname(ROCKER_SWITCH_ID+4);
996         }
997     }
998     return "???";
999 }
1000 #else
1001 static const char *rocker_reg_name(void *opaque, hwaddr addr)
1002 {
1003     return NULL;
1004 }
1005 #endif
1006 
1007 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1008                               unsigned size)
1009 {
1010     DPRINTF("Write %s addr " TARGET_FMT_plx
1011             ", size %u, val " TARGET_FMT_plx "\n",
1012             rocker_reg_name(opaque, addr), addr, size, val);
1013 
1014     switch (size) {
1015     case 4:
1016         rocker_io_writel(opaque, addr, val);
1017         break;
1018     case 8:
1019         rocker_io_writeq(opaque, addr, val);
1020         break;
1021     }
1022 }
1023 
1024 static uint64_t rocker_port_phys_link_status(Rocker *r)
1025 {
1026     int i;
1027     uint64_t status = 0;
1028 
1029     for (i = 0; i < r->fp_ports; i++) {
1030         FpPort *port = r->fp_port[i];
1031 
1032         if (fp_port_get_link_up(port)) {
1033             status |= 1 << (i + 1);
1034         }
1035     }
1036     return status;
1037 }
1038 
1039 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1040 {
1041     int i;
1042     uint64_t ret = 0;
1043 
1044     for (i = 0; i < r->fp_ports; i++) {
1045         FpPort *port = r->fp_port[i];
1046 
1047         if (fp_port_enabled(port)) {
1048             ret |= 1 << (i + 1);
1049         }
1050     }
1051     return ret;
1052 }
1053 
1054 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1055 {
1056     Rocker *r = opaque;
1057     uint32_t ret;
1058 
1059     if (rocker_addr_is_desc_reg(r, addr)) {
1060         unsigned index = ROCKER_RING_INDEX(addr);
1061         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1062 
1063         switch (offset) {
1064         case ROCKER_DMA_DESC_ADDR_OFFSET:
1065             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1066             break;
1067         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1068             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1069             break;
1070         case ROCKER_DMA_DESC_SIZE_OFFSET:
1071             ret = desc_ring_get_size(r->rings[index]);
1072             break;
1073         case ROCKER_DMA_DESC_HEAD_OFFSET:
1074             ret = desc_ring_get_head(r->rings[index]);
1075             break;
1076         case ROCKER_DMA_DESC_TAIL_OFFSET:
1077             ret = desc_ring_get_tail(r->rings[index]);
1078             break;
1079         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1080             ret = desc_ring_get_credits(r->rings[index]);
1081             break;
1082         default:
1083             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1084                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1085             ret = 0;
1086             break;
1087         }
1088         return ret;
1089     }
1090 
1091     switch (addr) {
1092     case ROCKER_BOGUS_REG0:
1093     case ROCKER_BOGUS_REG1:
1094     case ROCKER_BOGUS_REG2:
1095     case ROCKER_BOGUS_REG3:
1096         ret = 0xDEADBABE;
1097         break;
1098     case ROCKER_TEST_REG:
1099         ret = r->test_reg * 2;
1100         break;
1101     case ROCKER_TEST_REG64:
1102         ret = (uint32_t)(r->test_reg64 * 2);
1103         break;
1104     case ROCKER_TEST_REG64 + 4:
1105         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1106         break;
1107     case ROCKER_TEST_DMA_SIZE:
1108         ret = r->test_dma_size;
1109         break;
1110     case ROCKER_TEST_DMA_ADDR:
1111         ret = (uint32_t)r->test_dma_addr;
1112         break;
1113     case ROCKER_TEST_DMA_ADDR + 4:
1114         ret = (uint32_t)(r->test_dma_addr >> 32);
1115         break;
1116     case ROCKER_PORT_PHYS_COUNT:
1117         ret = r->fp_ports;
1118         break;
1119     case ROCKER_PORT_PHYS_LINK_STATUS:
1120         ret = (uint32_t)rocker_port_phys_link_status(r);
1121         break;
1122     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1123         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1124         break;
1125     case ROCKER_PORT_PHYS_ENABLE:
1126         ret = (uint32_t)rocker_port_phys_enable_read(r);
1127         break;
1128     case ROCKER_PORT_PHYS_ENABLE + 4:
1129         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1130         break;
1131     case ROCKER_SWITCH_ID:
1132         ret = (uint32_t)r->switch_id;
1133         break;
1134     case ROCKER_SWITCH_ID + 4:
1135         ret = (uint32_t)(r->switch_id >> 32);
1136         break;
1137     default:
1138         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1139         ret = 0;
1140         break;
1141     }
1142     return ret;
1143 }
1144 
1145 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1146 {
1147     Rocker *r = opaque;
1148     uint64_t ret;
1149 
1150     if (rocker_addr_is_desc_reg(r, addr)) {
1151         unsigned index = ROCKER_RING_INDEX(addr);
1152         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1153 
1154         switch (addr & ROCKER_DMA_DESC_MASK) {
1155         case ROCKER_DMA_DESC_ADDR_OFFSET:
1156             ret = desc_ring_get_base_addr(r->rings[index]);
1157             break;
1158         default:
1159             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1160                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1161             ret = 0;
1162             break;
1163         }
1164         return ret;
1165     }
1166 
1167     switch (addr) {
1168     case ROCKER_BOGUS_REG0:
1169     case ROCKER_BOGUS_REG2:
1170         ret = 0xDEADBABEDEADBABEULL;
1171         break;
1172     case ROCKER_TEST_REG64:
1173         ret = r->test_reg64 * 2;
1174         break;
1175     case ROCKER_TEST_DMA_ADDR:
1176         ret = r->test_dma_addr;
1177         break;
1178     case ROCKER_PORT_PHYS_LINK_STATUS:
1179         ret = rocker_port_phys_link_status(r);
1180         break;
1181     case ROCKER_PORT_PHYS_ENABLE:
1182         ret = rocker_port_phys_enable_read(r);
1183         break;
1184     case ROCKER_SWITCH_ID:
1185         ret = r->switch_id;
1186         break;
1187     default:
1188         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1189         ret = 0;
1190         break;
1191     }
1192     return ret;
1193 }
1194 
1195 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1196 {
1197     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1198             rocker_reg_name(opaque, addr), addr, size);
1199 
1200     switch (size) {
1201     case 4:
1202         return rocker_io_readl(opaque, addr);
1203     case 8:
1204         return rocker_io_readq(opaque, addr);
1205     }
1206 
1207     return -1;
1208 }
1209 
1210 static const MemoryRegionOps rocker_mmio_ops = {
1211     .read = rocker_mmio_read,
1212     .write = rocker_mmio_write,
1213     .endianness = DEVICE_LITTLE_ENDIAN,
1214     .valid = {
1215         .min_access_size = 4,
1216         .max_access_size = 8,
1217     },
1218     .impl = {
1219         .min_access_size = 4,
1220         .max_access_size = 8,
1221     },
1222 };
1223 
1224 static void rocker_msix_vectors_unuse(Rocker *r,
1225                                       unsigned int num_vectors)
1226 {
1227     PCIDevice *dev = PCI_DEVICE(r);
1228     int i;
1229 
1230     for (i = 0; i < num_vectors; i++) {
1231         msix_vector_unuse(dev, i);
1232     }
1233 }
1234 
1235 static int rocker_msix_vectors_use(Rocker *r,
1236                                    unsigned int num_vectors)
1237 {
1238     PCIDevice *dev = PCI_DEVICE(r);
1239     int err;
1240     int i;
1241 
1242     for (i = 0; i < num_vectors; i++) {
1243         err = msix_vector_use(dev, i);
1244         if (err) {
1245             goto rollback;
1246         }
1247     }
1248     return 0;
1249 
1250 rollback:
1251     rocker_msix_vectors_unuse(r, i);
1252     return err;
1253 }
1254 
1255 static int rocker_msix_init(Rocker *r)
1256 {
1257     PCIDevice *dev = PCI_DEVICE(r);
1258     int err;
1259     Error *local_err = NULL;
1260 
1261     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1262                     &r->msix_bar,
1263                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1264                     &r->msix_bar,
1265                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1266                     0, &local_err);
1267     if (err) {
1268         error_report_err(local_err);
1269         return err;
1270     }
1271 
1272     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1273     if (err) {
1274         goto err_msix_vectors_use;
1275     }
1276 
1277     return 0;
1278 
1279 err_msix_vectors_use:
1280     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1281     return err;
1282 }
1283 
1284 static void rocker_msix_uninit(Rocker *r)
1285 {
1286     PCIDevice *dev = PCI_DEVICE(r);
1287 
1288     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1289     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1290 }
1291 
1292 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1293 {
1294     int i;
1295 
1296     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1297         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1298             return r->worlds[i];
1299 	}
1300     }
1301     return NULL;
1302 }
1303 
1304 static int pci_rocker_init(PCIDevice *dev)
1305 {
1306     Rocker *r = to_rocker(dev);
1307     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1308     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1309     static int sw_index;
1310     int i, err = 0;
1311 
1312     /* allocate worlds */
1313 
1314     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1315 
1316     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1317         if (!r->worlds[i]) {
1318             err = -ENOMEM;
1319             goto err_world_alloc;
1320         }
1321     }
1322 
1323     if (!r->world_name) {
1324         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1325     }
1326 
1327     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1328     if (!r->world_dflt) {
1329         fprintf(stderr,
1330                 "rocker: requested world \"%s\" does not exist\n",
1331                 r->world_name);
1332         err = -EINVAL;
1333         goto err_world_type_by_name;
1334     }
1335 
1336     /* set up memory-mapped region at BAR0 */
1337 
1338     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1339                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1340     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1341                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1342 
1343     /* set up memory-mapped region for MSI-X */
1344 
1345     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1346                        ROCKER_PCI_MSIX_BAR_SIZE);
1347     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1348                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1349 
1350     /* MSI-X init */
1351 
1352     err = rocker_msix_init(r);
1353     if (err) {
1354         goto err_msix_init;
1355     }
1356 
1357     /* validate switch properties */
1358 
1359     if (!r->name) {
1360         r->name = g_strdup(ROCKER);
1361     }
1362 
1363     if (rocker_find(r->name)) {
1364         err = -EEXIST;
1365         goto err_duplicate;
1366     }
1367 
1368     /* Rocker name is passed in port name requests to OS with the intention
1369      * that the name is used in interface names. Limit the length of the
1370      * rocker name to avoid naming problems in the OS. Also, adding the
1371      * port number as p# and unganged breakout b#, where # is at most 2
1372      * digits, so leave room for it too (-1 for string terminator, -3 for
1373      * p# and -3 for b#)
1374      */
1375 #define ROCKER_IFNAMSIZ 16
1376 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1377     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1378         fprintf(stderr,
1379                 "rocker: name too long; please shorten to at most %d chars\n",
1380                 MAX_ROCKER_NAME_LEN);
1381         return -EINVAL;
1382     }
1383 
1384     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1385         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1386         r->fp_start_macaddr.a[4] += (sw_index++);
1387     }
1388 
1389     if (!r->switch_id) {
1390         memcpy(&r->switch_id, &r->fp_start_macaddr,
1391                sizeof(r->fp_start_macaddr));
1392     }
1393 
1394     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1395         r->fp_ports = ROCKER_FP_PORTS_MAX;
1396     }
1397 
1398     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1399     if (!r->rings) {
1400         goto err_rings_alloc;
1401     }
1402 
1403     /* Rings are ordered like this:
1404      * - command ring
1405      * - event ring
1406      * - port0 tx ring
1407      * - port0 rx ring
1408      * - port1 tx ring
1409      * - port1 rx ring
1410      * .....
1411      */
1412 
1413     err = -ENOMEM;
1414     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1415         DescRing *ring = desc_ring_alloc(r, i);
1416 
1417         if (!ring) {
1418             goto err_ring_alloc;
1419         }
1420 
1421         if (i == ROCKER_RING_CMD) {
1422             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1423         } else if (i == ROCKER_RING_EVENT) {
1424             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1425         } else if (i % 2 == 0) {
1426             desc_ring_set_consume(ring, tx_consume,
1427                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1428         } else if (i % 2 == 1) {
1429             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1430         }
1431 
1432         r->rings[i] = ring;
1433     }
1434 
1435     for (i = 0; i < r->fp_ports; i++) {
1436         FpPort *port =
1437             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1438                           i, &r->fp_ports_peers[i]);
1439 
1440         if (!port) {
1441             goto err_port_alloc;
1442         }
1443 
1444         r->fp_port[i] = port;
1445         fp_port_set_world(port, r->world_dflt);
1446     }
1447 
1448     QLIST_INSERT_HEAD(&rockers, r, next);
1449 
1450     return 0;
1451 
1452 err_port_alloc:
1453     for (--i; i >= 0; i--) {
1454         FpPort *port = r->fp_port[i];
1455         fp_port_free(port);
1456     }
1457     i = rocker_pci_ring_count(r);
1458 err_ring_alloc:
1459     for (--i; i >= 0; i--) {
1460         desc_ring_free(r->rings[i]);
1461     }
1462     g_free(r->rings);
1463 err_rings_alloc:
1464 err_duplicate:
1465     rocker_msix_uninit(r);
1466 err_msix_init:
1467     object_unparent(OBJECT(&r->msix_bar));
1468     object_unparent(OBJECT(&r->mmio));
1469 err_world_type_by_name:
1470 err_world_alloc:
1471     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1472         if (r->worlds[i]) {
1473             world_free(r->worlds[i]);
1474         }
1475     }
1476     return err;
1477 }
1478 
1479 static void pci_rocker_uninit(PCIDevice *dev)
1480 {
1481     Rocker *r = to_rocker(dev);
1482     int i;
1483 
1484     QLIST_REMOVE(r, next);
1485 
1486     for (i = 0; i < r->fp_ports; i++) {
1487         FpPort *port = r->fp_port[i];
1488 
1489         fp_port_free(port);
1490         r->fp_port[i] = NULL;
1491     }
1492 
1493     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1494         if (r->rings[i]) {
1495             desc_ring_free(r->rings[i]);
1496         }
1497     }
1498     g_free(r->rings);
1499 
1500     rocker_msix_uninit(r);
1501     object_unparent(OBJECT(&r->msix_bar));
1502     object_unparent(OBJECT(&r->mmio));
1503 
1504     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1505         if (r->worlds[i]) {
1506             world_free(r->worlds[i]);
1507         }
1508     }
1509     g_free(r->fp_ports_peers);
1510 }
1511 
1512 static void rocker_reset(DeviceState *dev)
1513 {
1514     Rocker *r = to_rocker(dev);
1515     int i;
1516 
1517     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1518         if (r->worlds[i]) {
1519             world_reset(r->worlds[i]);
1520         }
1521     }
1522     for (i = 0; i < r->fp_ports; i++) {
1523         fp_port_reset(r->fp_port[i]);
1524         fp_port_set_world(r->fp_port[i], r->world_dflt);
1525     }
1526 
1527     r->test_reg = 0;
1528     r->test_reg64 = 0;
1529     r->test_dma_addr = 0;
1530     r->test_dma_size = 0;
1531 
1532     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1533         desc_ring_reset(r->rings[i]);
1534     }
1535 
1536     DPRINTF("Reset done\n");
1537 }
1538 
1539 static Property rocker_properties[] = {
1540     DEFINE_PROP_STRING("name", Rocker, name),
1541     DEFINE_PROP_STRING("world", Rocker, world_name),
1542     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1543                         fp_start_macaddr),
1544     DEFINE_PROP_UINT64("switch_id", Rocker,
1545                        switch_id, 0),
1546     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1547                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1548     DEFINE_PROP_END_OF_LIST(),
1549 };
1550 
1551 static const VMStateDescription rocker_vmsd = {
1552     .name = ROCKER,
1553     .unmigratable = 1,
1554 };
1555 
1556 static void rocker_class_init(ObjectClass *klass, void *data)
1557 {
1558     DeviceClass *dc = DEVICE_CLASS(klass);
1559     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1560 
1561     k->init = pci_rocker_init;
1562     k->exit = pci_rocker_uninit;
1563     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1564     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1565     k->revision = ROCKER_PCI_REVISION;
1566     k->class_id = PCI_CLASS_NETWORK_OTHER;
1567     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1568     dc->desc = "Rocker Switch";
1569     dc->reset = rocker_reset;
1570     dc->props = rocker_properties;
1571     dc->vmsd = &rocker_vmsd;
1572 }
1573 
1574 static const TypeInfo rocker_info = {
1575     .name          = ROCKER,
1576     .parent        = TYPE_PCI_DEVICE,
1577     .instance_size = sizeof(Rocker),
1578     .class_init    = rocker_class_init,
1579 };
1580 
1581 static void rocker_register_types(void)
1582 {
1583     type_register_static(&rocker_info);
1584 }
1585 
1586 type_init(rocker_register_types)
1587