xref: /qemu/hw/net/rocker/rocker.c (revision 6f0dd6c5)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/pci/pci.h"
20 #include "hw/qdev-properties.h"
21 #include "migration/vmstate.h"
22 #include "hw/pci/msix.h"
23 #include "net/net.h"
24 #include "net/eth.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-commands-rocker.h"
27 #include "qemu/iov.h"
28 #include "qemu/module.h"
29 #include "qemu/bitops.h"
30 
31 #include "rocker.h"
32 #include "rocker_hw.h"
33 #include "rocker_fp.h"
34 #include "rocker_desc.h"
35 #include "rocker_tlv.h"
36 #include "rocker_world.h"
37 #include "rocker_of_dpa.h"
38 
39 struct rocker {
40     /* private */
41     PCIDevice parent_obj;
42     /* public */
43 
44     MemoryRegion mmio;
45     MemoryRegion msix_bar;
46 
47     /* switch configuration */
48     char *name;                  /* switch name */
49     char *world_name;            /* world name */
50     uint32_t fp_ports;           /* front-panel port count */
51     NICPeers *fp_ports_peers;
52     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
53     uint64_t switch_id;          /* switch id */
54 
55     /* front-panel ports */
56     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
57 
58     /* register backings */
59     uint32_t test_reg;
60     uint64_t test_reg64;
61     dma_addr_t test_dma_addr;
62     uint32_t test_dma_size;
63     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
64 
65     /* desc rings */
66     DescRing **rings;
67 
68     /* switch worlds */
69     World *worlds[ROCKER_WORLD_TYPE_MAX];
70     World *world_dflt;
71 
72     QLIST_ENTRY(rocker) next;
73 };
74 
75 #define TYPE_ROCKER "rocker"
76 
77 #define ROCKER(obj) \
78     OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
79 
80 static QLIST_HEAD(, rocker) rockers;
81 
82 Rocker *rocker_find(const char *name)
83 {
84     Rocker *r;
85 
86     QLIST_FOREACH(r, &rockers, next)
87         if (strcmp(r->name, name) == 0) {
88             return r;
89         }
90 
91     return NULL;
92 }
93 
94 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
95 {
96     if (type < ROCKER_WORLD_TYPE_MAX) {
97         return r->worlds[type];
98     }
99     return NULL;
100 }
101 
102 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
103 {
104     RockerSwitch *rocker;
105     Rocker *r;
106 
107     r = rocker_find(name);
108     if (!r) {
109         error_setg(errp, "rocker %s not found", name);
110         return NULL;
111     }
112 
113     rocker = g_new0(RockerSwitch, 1);
114     rocker->name = g_strdup(r->name);
115     rocker->id = r->switch_id;
116     rocker->ports = r->fp_ports;
117 
118     return rocker;
119 }
120 
121 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
122 {
123     RockerPortList *list = NULL;
124     Rocker *r;
125     int i;
126 
127     r = rocker_find(name);
128     if (!r) {
129         error_setg(errp, "rocker %s not found", name);
130         return NULL;
131     }
132 
133     for (i = r->fp_ports - 1; i >= 0; i--) {
134         RockerPortList *info = g_malloc0(sizeof(*info));
135         info->value = g_malloc0(sizeof(*info->value));
136         struct fp_port *port = r->fp_port[i];
137 
138         fp_port_get_info(port, info);
139         info->next = list;
140         list = info;
141     }
142 
143     return list;
144 }
145 
146 uint32_t rocker_fp_ports(Rocker *r)
147 {
148     return r->fp_ports;
149 }
150 
151 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
152                                             DescRing *ring)
153 {
154     return (desc_ring_index(ring) - 2) / 2 + 1;
155 }
156 
157 static int tx_consume(Rocker *r, DescInfo *info)
158 {
159     PCIDevice *dev = PCI_DEVICE(r);
160     char *buf = desc_get_buf(info, true);
161     RockerTlv *tlv_frag;
162     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
163     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
164     uint32_t pport;
165     uint32_t port;
166     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
167     uint16_t tx_l3_csum_off = 0;
168     uint16_t tx_tso_mss = 0;
169     uint16_t tx_tso_hdr_len = 0;
170     int iovcnt = 0;
171     int err = ROCKER_OK;
172     int rem;
173     int i;
174 
175     if (!buf) {
176         return -ROCKER_ENXIO;
177     }
178 
179     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
180 
181     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
182         return -ROCKER_EINVAL;
183     }
184 
185     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
186     if (!fp_port_from_pport(pport, &port)) {
187         return -ROCKER_EINVAL;
188     }
189 
190     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
191         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
192     }
193 
194     switch (tx_offload) {
195     case ROCKER_TX_OFFLOAD_L3_CSUM:
196         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
197             return -ROCKER_EINVAL;
198         }
199         break;
200     case ROCKER_TX_OFFLOAD_TSO:
201         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
202             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
203             return -ROCKER_EINVAL;
204         }
205         break;
206     }
207 
208     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
209         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
210     }
211 
212     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
213         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
214     }
215 
216     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
217         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
218     }
219 
220     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
221         hwaddr frag_addr;
222         uint16_t frag_len;
223 
224         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
225             err = -ROCKER_EINVAL;
226             goto err_bad_attr;
227         }
228 
229         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
230 
231         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
232             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
233             err = -ROCKER_EINVAL;
234             goto err_bad_attr;
235         }
236 
237         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
238         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
239 
240         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
241             goto err_too_many_frags;
242         }
243         iov[iovcnt].iov_len = frag_len;
244         iov[iovcnt].iov_base = g_malloc(frag_len);
245 
246         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
247                      iov[iovcnt].iov_len);
248 
249         iovcnt++;
250     }
251 
252     if (iovcnt) {
253         /* XXX perform Tx offloads */
254         /* XXX   silence compiler for now */
255         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
256     }
257 
258     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
259 
260 err_too_many_frags:
261 err_bad_attr:
262     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
263         g_free(iov[i].iov_base);
264     }
265 
266     return err;
267 }
268 
269 static int cmd_get_port_settings(Rocker *r,
270                                  DescInfo *info, char *buf,
271                                  RockerTlv *cmd_info_tlv)
272 {
273     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
274     RockerTlv *nest;
275     FpPort *fp_port;
276     uint32_t pport;
277     uint32_t port;
278     uint32_t speed;
279     uint8_t duplex;
280     uint8_t autoneg;
281     uint8_t learning;
282     char *phys_name;
283     MACAddr macaddr;
284     enum rocker_world_type mode;
285     size_t tlv_size;
286     int pos;
287     int err;
288 
289     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
290                             cmd_info_tlv);
291 
292     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
293         return -ROCKER_EINVAL;
294     }
295 
296     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
297     if (!fp_port_from_pport(pport, &port)) {
298         return -ROCKER_EINVAL;
299     }
300     fp_port = r->fp_port[port];
301 
302     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
303     if (err) {
304         return err;
305     }
306 
307     fp_port_get_macaddr(fp_port, &macaddr);
308     mode = world_type(fp_port_get_world(fp_port));
309     learning = fp_port_get_learning(fp_port);
310     phys_name = fp_port_get_name(fp_port);
311 
312     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
313                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
314                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
315                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
316                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
317                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
318                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
319                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
320                rocker_tlv_total_size(strlen(phys_name));
321 
322     if (tlv_size > desc_buf_size(info)) {
323         return -ROCKER_EMSGSIZE;
324     }
325 
326     pos = 0;
327     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
328     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
329     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
330     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
331     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
332     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
333                    sizeof(macaddr.a), macaddr.a);
334     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
335     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
336                       learning);
337     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
338                    strlen(phys_name), phys_name);
339     rocker_tlv_nest_end(buf, &pos, nest);
340 
341     return desc_set_buf(info, tlv_size);
342 }
343 
344 static int cmd_set_port_settings(Rocker *r,
345                                  RockerTlv *cmd_info_tlv)
346 {
347     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
348     FpPort *fp_port;
349     uint32_t pport;
350     uint32_t port;
351     uint32_t speed;
352     uint8_t duplex;
353     uint8_t autoneg;
354     uint8_t learning;
355     MACAddr macaddr;
356     enum rocker_world_type mode;
357     int err;
358 
359     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
360                             cmd_info_tlv);
361 
362     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
363         return -ROCKER_EINVAL;
364     }
365 
366     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
367     if (!fp_port_from_pport(pport, &port)) {
368         return -ROCKER_EINVAL;
369     }
370     fp_port = r->fp_port[port];
371 
372     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
373         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
374         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
375 
376         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
377         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
378         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
379 
380         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
381         if (err) {
382             return err;
383         }
384     }
385 
386     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
387         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
388             sizeof(macaddr.a)) {
389             return -ROCKER_EINVAL;
390         }
391         memcpy(macaddr.a,
392                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
393                sizeof(macaddr.a));
394         fp_port_set_macaddr(fp_port, &macaddr);
395     }
396 
397     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
398         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
399         if (mode >= ROCKER_WORLD_TYPE_MAX) {
400             return -ROCKER_EINVAL;
401         }
402         /* We don't support world change. */
403         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
404             return -ROCKER_EINVAL;
405         }
406     }
407 
408     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
409         learning =
410             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
411         fp_port_set_learning(fp_port, learning);
412     }
413 
414     return ROCKER_OK;
415 }
416 
417 static int cmd_consume(Rocker *r, DescInfo *info)
418 {
419     char *buf = desc_get_buf(info, false);
420     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
421     RockerTlv *info_tlv;
422     World *world;
423     uint16_t cmd;
424     int err;
425 
426     if (!buf) {
427         return -ROCKER_ENXIO;
428     }
429 
430     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
431 
432     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
433         return -ROCKER_EINVAL;
434     }
435 
436     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
437     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
438 
439     /* This might be reworked to something like this:
440      * Every world will have an array of command handlers from
441      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
442      * up to each world to implement whatever command it want.
443      * It can reference "generic" commands as cmd_set_port_settings or
444      * cmd_get_port_settings
445      */
446 
447     switch (cmd) {
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
454     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
455     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
456         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
457         err = world_do_cmd(world, info, buf, cmd, info_tlv);
458         break;
459     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
460         err = cmd_get_port_settings(r, info, buf, info_tlv);
461         break;
462     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
463         err = cmd_set_port_settings(r, info_tlv);
464         break;
465     default:
466         err = -ROCKER_EINVAL;
467         break;
468     }
469 
470     return err;
471 }
472 
473 static void rocker_msix_irq(Rocker *r, unsigned vector)
474 {
475     PCIDevice *dev = PCI_DEVICE(r);
476 
477     DPRINTF("MSI-X notify request for vector %d\n", vector);
478     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
479         DPRINTF("incorrect vector %d\n", vector);
480         return;
481     }
482     msix_notify(dev, vector);
483 }
484 
485 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
486 {
487     DescRing *ring = r->rings[ROCKER_RING_EVENT];
488     DescInfo *info = desc_ring_fetch_desc(ring);
489     RockerTlv *nest;
490     char *buf;
491     size_t tlv_size;
492     int pos;
493     int err;
494 
495     if (!info) {
496         return -ROCKER_ENOBUFS;
497     }
498 
499     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
500                rocker_tlv_total_size(0) +                 /* nest */
501                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
502                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
503 
504     if (tlv_size > desc_buf_size(info)) {
505         err = -ROCKER_EMSGSIZE;
506         goto err_too_big;
507     }
508 
509     buf = desc_get_buf(info, false);
510     if (!buf) {
511         err = -ROCKER_ENOMEM;
512         goto err_no_mem;
513     }
514 
515     pos = 0;
516     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
517                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
518     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
519     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
520     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
521                       link_up ? 1 : 0);
522     rocker_tlv_nest_end(buf, &pos, nest);
523 
524     err = desc_set_buf(info, tlv_size);
525 
526 err_too_big:
527 err_no_mem:
528     if (desc_ring_post_desc(ring, err)) {
529         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
530     }
531 
532     return err;
533 }
534 
535 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
536                                uint16_t vlan_id)
537 {
538     DescRing *ring = r->rings[ROCKER_RING_EVENT];
539     DescInfo *info;
540     FpPort *fp_port;
541     uint32_t port;
542     RockerTlv *nest;
543     char *buf;
544     size_t tlv_size;
545     int pos;
546     int err;
547 
548     if (!fp_port_from_pport(pport, &port)) {
549         return -ROCKER_EINVAL;
550     }
551     fp_port = r->fp_port[port];
552     if (!fp_port_get_learning(fp_port)) {
553         return ROCKER_OK;
554     }
555 
556     info = desc_ring_fetch_desc(ring);
557     if (!info) {
558         return -ROCKER_ENOBUFS;
559     }
560 
561     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
562                rocker_tlv_total_size(0) +                 /* nest */
563                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
564                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
565                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
566 
567     if (tlv_size > desc_buf_size(info)) {
568         err = -ROCKER_EMSGSIZE;
569         goto err_too_big;
570     }
571 
572     buf = desc_get_buf(info, false);
573     if (!buf) {
574         err = -ROCKER_ENOMEM;
575         goto err_no_mem;
576     }
577 
578     pos = 0;
579     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
580                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
581     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
582     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
583     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
584     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
585     rocker_tlv_nest_end(buf, &pos, nest);
586 
587     err = desc_set_buf(info, tlv_size);
588 
589 err_too_big:
590 err_no_mem:
591     if (desc_ring_post_desc(ring, err)) {
592         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
593     }
594 
595     return err;
596 }
597 
598 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
599                                                      uint32_t pport)
600 {
601     return r->rings[(pport - 1) * 2 + 3];
602 }
603 
604 int rx_produce(World *world, uint32_t pport,
605                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
606 {
607     Rocker *r = world_rocker(world);
608     PCIDevice *dev = (PCIDevice *)r;
609     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
610     DescInfo *info = desc_ring_fetch_desc(ring);
611     char *data;
612     size_t data_size = iov_size(iov, iovcnt);
613     char *buf;
614     uint16_t rx_flags = 0;
615     uint16_t rx_csum = 0;
616     size_t tlv_size;
617     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
618     hwaddr frag_addr;
619     uint16_t frag_max_len;
620     int pos;
621     int err;
622 
623     if (!info) {
624         return -ROCKER_ENOBUFS;
625     }
626 
627     buf = desc_get_buf(info, false);
628     if (!buf) {
629         err = -ROCKER_ENXIO;
630         goto out;
631     }
632     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
633 
634     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
635         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
636         err = -ROCKER_EINVAL;
637         goto out;
638     }
639 
640     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
641     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
642 
643     if (data_size > frag_max_len) {
644         err = -ROCKER_EMSGSIZE;
645         goto out;
646     }
647 
648     if (copy_to_cpu) {
649         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
650     }
651 
652     /* XXX calc rx flags/csum */
653 
654     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
655                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
656                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
657                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
658                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
659 
660     if (tlv_size > desc_buf_size(info)) {
661         err = -ROCKER_EMSGSIZE;
662         goto out;
663     }
664 
665     /* TODO:
666      * iov dma write can be optimized in similar way e1000 does it in
667      * e1000_receive_iov. But maybe if would make sense to introduce
668      * generic helper iov_dma_write.
669      */
670 
671     data = g_malloc(data_size);
672 
673     iov_to_buf(iov, iovcnt, 0, data, data_size);
674     pci_dma_write(dev, frag_addr, data, data_size);
675     g_free(data);
676 
677     pos = 0;
678     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
679     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
680     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
681     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
682     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
683 
684     err = desc_set_buf(info, tlv_size);
685 
686 out:
687     if (desc_ring_post_desc(ring, err)) {
688         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
689     }
690 
691     return err;
692 }
693 
694 int rocker_port_eg(Rocker *r, uint32_t pport,
695                    const struct iovec *iov, int iovcnt)
696 {
697     FpPort *fp_port;
698     uint32_t port;
699 
700     if (!fp_port_from_pport(pport, &port)) {
701         return -ROCKER_EINVAL;
702     }
703 
704     fp_port = r->fp_port[port];
705 
706     return fp_port_eg(fp_port, iov, iovcnt);
707 }
708 
709 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
710 {
711     PCIDevice *dev = PCI_DEVICE(r);
712     char *buf;
713     int i;
714 
715     buf = g_malloc(r->test_dma_size);
716 
717     switch (val) {
718     case ROCKER_TEST_DMA_CTRL_CLEAR:
719         memset(buf, 0, r->test_dma_size);
720         break;
721     case ROCKER_TEST_DMA_CTRL_FILL:
722         memset(buf, 0x96, r->test_dma_size);
723         break;
724     case ROCKER_TEST_DMA_CTRL_INVERT:
725         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
726         for (i = 0; i < r->test_dma_size; i++) {
727             buf[i] = ~buf[i];
728         }
729         break;
730     default:
731         DPRINTF("not test dma control val=0x%08x\n", val);
732         goto err_out;
733     }
734     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
735 
736     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
737 
738 err_out:
739     g_free(buf);
740 }
741 
742 static void rocker_reset(DeviceState *dev);
743 
744 static void rocker_control(Rocker *r, uint32_t val)
745 {
746     if (val & ROCKER_CONTROL_RESET) {
747         rocker_reset(DEVICE(r));
748     }
749 }
750 
751 static int rocker_pci_ring_count(Rocker *r)
752 {
753     /* There are:
754      * - command ring
755      * - event ring
756      * - tx and rx ring per each port
757      */
758     return 2 + (2 * r->fp_ports);
759 }
760 
761 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
762 {
763     hwaddr start = ROCKER_DMA_DESC_BASE;
764     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
765 
766     return addr >= start && addr < end;
767 }
768 
769 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
770 {
771     int i;
772     bool old_enabled;
773     bool new_enabled;
774     FpPort *fp_port;
775 
776     for (i = 0; i < r->fp_ports; i++) {
777         fp_port = r->fp_port[i];
778         old_enabled = fp_port_enabled(fp_port);
779         new_enabled = (new >> (i + 1)) & 0x1;
780         if (new_enabled == old_enabled) {
781             continue;
782         }
783         if (new_enabled) {
784             fp_port_enable(r->fp_port[i]);
785         } else {
786             fp_port_disable(r->fp_port[i]);
787         }
788     }
789 }
790 
791 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
792 {
793     Rocker *r = opaque;
794 
795     if (rocker_addr_is_desc_reg(r, addr)) {
796         unsigned index = ROCKER_RING_INDEX(addr);
797         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
798 
799         switch (offset) {
800         case ROCKER_DMA_DESC_ADDR_OFFSET:
801             r->lower32 = (uint64_t)val;
802             break;
803         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
804             desc_ring_set_base_addr(r->rings[index],
805                                     ((uint64_t)val) << 32 | r->lower32);
806             r->lower32 = 0;
807             break;
808         case ROCKER_DMA_DESC_SIZE_OFFSET:
809             desc_ring_set_size(r->rings[index], val);
810             break;
811         case ROCKER_DMA_DESC_HEAD_OFFSET:
812             if (desc_ring_set_head(r->rings[index], val)) {
813                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
814             }
815             break;
816         case ROCKER_DMA_DESC_CTRL_OFFSET:
817             desc_ring_set_ctrl(r->rings[index], val);
818             break;
819         case ROCKER_DMA_DESC_CREDITS_OFFSET:
820             if (desc_ring_ret_credits(r->rings[index], val)) {
821                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
822             }
823             break;
824         default:
825             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
826                     " val=0x%08x (ring %d, addr=0x%02x)\n",
827                     addr, val, index, offset);
828             break;
829         }
830         return;
831     }
832 
833     switch (addr) {
834     case ROCKER_TEST_REG:
835         r->test_reg = val;
836         break;
837     case ROCKER_TEST_REG64:
838     case ROCKER_TEST_DMA_ADDR:
839     case ROCKER_PORT_PHYS_ENABLE:
840         r->lower32 = (uint64_t)val;
841         break;
842     case ROCKER_TEST_REG64 + 4:
843         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
844         r->lower32 = 0;
845         break;
846     case ROCKER_TEST_IRQ:
847         rocker_msix_irq(r, val);
848         break;
849     case ROCKER_TEST_DMA_SIZE:
850         r->test_dma_size = val & 0xFFFF;
851         break;
852     case ROCKER_TEST_DMA_ADDR + 4:
853         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
854         r->lower32 = 0;
855         break;
856     case ROCKER_TEST_DMA_CTRL:
857         rocker_test_dma_ctrl(r, val);
858         break;
859     case ROCKER_CONTROL:
860         rocker_control(r, val);
861         break;
862     case ROCKER_PORT_PHYS_ENABLE + 4:
863         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
864         r->lower32 = 0;
865         break;
866     default:
867         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
868                 " val=0x%08x\n", addr, val);
869         break;
870     }
871 }
872 
873 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
874 {
875     Rocker *r = opaque;
876 
877     if (rocker_addr_is_desc_reg(r, addr)) {
878         unsigned index = ROCKER_RING_INDEX(addr);
879         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
880 
881         switch (offset) {
882         case ROCKER_DMA_DESC_ADDR_OFFSET:
883             desc_ring_set_base_addr(r->rings[index], val);
884             break;
885         default:
886             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
887                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
888                     addr, val, index, offset);
889             break;
890         }
891         return;
892     }
893 
894     switch (addr) {
895     case ROCKER_TEST_REG64:
896         r->test_reg64 = val;
897         break;
898     case ROCKER_TEST_DMA_ADDR:
899         r->test_dma_addr = val;
900         break;
901     case ROCKER_PORT_PHYS_ENABLE:
902         rocker_port_phys_enable_write(r, val);
903         break;
904     default:
905         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
906                 " val=0x" TARGET_FMT_plx "\n", addr, val);
907         break;
908     }
909 }
910 
911 #ifdef DEBUG_ROCKER
912 #define regname(reg) case (reg): return #reg
913 static const char *rocker_reg_name(void *opaque, hwaddr addr)
914 {
915     Rocker *r = opaque;
916 
917     if (rocker_addr_is_desc_reg(r, addr)) {
918         unsigned index = ROCKER_RING_INDEX(addr);
919         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
920         static char buf[100];
921         char ring_name[10];
922 
923         switch (index) {
924         case 0:
925             sprintf(ring_name, "cmd");
926             break;
927         case 1:
928             sprintf(ring_name, "event");
929             break;
930         default:
931             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
932                     (index - 2) / 2);
933         }
934 
935         switch (offset) {
936         case ROCKER_DMA_DESC_ADDR_OFFSET:
937             sprintf(buf, "Ring[%s] ADDR", ring_name);
938             return buf;
939         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
940             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
941             return buf;
942         case ROCKER_DMA_DESC_SIZE_OFFSET:
943             sprintf(buf, "Ring[%s] SIZE", ring_name);
944             return buf;
945         case ROCKER_DMA_DESC_HEAD_OFFSET:
946             sprintf(buf, "Ring[%s] HEAD", ring_name);
947             return buf;
948         case ROCKER_DMA_DESC_TAIL_OFFSET:
949             sprintf(buf, "Ring[%s] TAIL", ring_name);
950             return buf;
951         case ROCKER_DMA_DESC_CTRL_OFFSET:
952             sprintf(buf, "Ring[%s] CTRL", ring_name);
953             return buf;
954         case ROCKER_DMA_DESC_CREDITS_OFFSET:
955             sprintf(buf, "Ring[%s] CREDITS", ring_name);
956             return buf;
957         default:
958             sprintf(buf, "Ring[%s] ???", ring_name);
959             return buf;
960         }
961     } else {
962         switch (addr) {
963             regname(ROCKER_BOGUS_REG0);
964             regname(ROCKER_BOGUS_REG1);
965             regname(ROCKER_BOGUS_REG2);
966             regname(ROCKER_BOGUS_REG3);
967             regname(ROCKER_TEST_REG);
968             regname(ROCKER_TEST_REG64);
969             regname(ROCKER_TEST_REG64+4);
970             regname(ROCKER_TEST_IRQ);
971             regname(ROCKER_TEST_DMA_ADDR);
972             regname(ROCKER_TEST_DMA_ADDR+4);
973             regname(ROCKER_TEST_DMA_SIZE);
974             regname(ROCKER_TEST_DMA_CTRL);
975             regname(ROCKER_CONTROL);
976             regname(ROCKER_PORT_PHYS_COUNT);
977             regname(ROCKER_PORT_PHYS_LINK_STATUS);
978             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
979             regname(ROCKER_PORT_PHYS_ENABLE);
980             regname(ROCKER_PORT_PHYS_ENABLE+4);
981             regname(ROCKER_SWITCH_ID);
982             regname(ROCKER_SWITCH_ID+4);
983         }
984     }
985     return "???";
986 }
987 #else
988 static const char *rocker_reg_name(void *opaque, hwaddr addr)
989 {
990     return NULL;
991 }
992 #endif
993 
994 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
995                               unsigned size)
996 {
997     DPRINTF("Write %s addr " TARGET_FMT_plx
998             ", size %u, val " TARGET_FMT_plx "\n",
999             rocker_reg_name(opaque, addr), addr, size, val);
1000 
1001     switch (size) {
1002     case 4:
1003         rocker_io_writel(opaque, addr, val);
1004         break;
1005     case 8:
1006         rocker_io_writeq(opaque, addr, val);
1007         break;
1008     }
1009 }
1010 
1011 static uint64_t rocker_port_phys_link_status(Rocker *r)
1012 {
1013     int i;
1014     uint64_t status = 0;
1015 
1016     for (i = 0; i < r->fp_ports; i++) {
1017         FpPort *port = r->fp_port[i];
1018 
1019         if (fp_port_get_link_up(port)) {
1020             status |= 1 << (i + 1);
1021         }
1022     }
1023     return status;
1024 }
1025 
1026 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1027 {
1028     int i;
1029     uint64_t ret = 0;
1030 
1031     for (i = 0; i < r->fp_ports; i++) {
1032         FpPort *port = r->fp_port[i];
1033 
1034         if (fp_port_enabled(port)) {
1035             ret |= 1 << (i + 1);
1036         }
1037     }
1038     return ret;
1039 }
1040 
1041 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1042 {
1043     Rocker *r = opaque;
1044     uint32_t ret;
1045 
1046     if (rocker_addr_is_desc_reg(r, addr)) {
1047         unsigned index = ROCKER_RING_INDEX(addr);
1048         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1049 
1050         switch (offset) {
1051         case ROCKER_DMA_DESC_ADDR_OFFSET:
1052             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1053             break;
1054         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1055             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1056             break;
1057         case ROCKER_DMA_DESC_SIZE_OFFSET:
1058             ret = desc_ring_get_size(r->rings[index]);
1059             break;
1060         case ROCKER_DMA_DESC_HEAD_OFFSET:
1061             ret = desc_ring_get_head(r->rings[index]);
1062             break;
1063         case ROCKER_DMA_DESC_TAIL_OFFSET:
1064             ret = desc_ring_get_tail(r->rings[index]);
1065             break;
1066         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1067             ret = desc_ring_get_credits(r->rings[index]);
1068             break;
1069         default:
1070             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1071                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1072             ret = 0;
1073             break;
1074         }
1075         return ret;
1076     }
1077 
1078     switch (addr) {
1079     case ROCKER_BOGUS_REG0:
1080     case ROCKER_BOGUS_REG1:
1081     case ROCKER_BOGUS_REG2:
1082     case ROCKER_BOGUS_REG3:
1083         ret = 0xDEADBABE;
1084         break;
1085     case ROCKER_TEST_REG:
1086         ret = r->test_reg * 2;
1087         break;
1088     case ROCKER_TEST_REG64:
1089         ret = (uint32_t)(r->test_reg64 * 2);
1090         break;
1091     case ROCKER_TEST_REG64 + 4:
1092         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1093         break;
1094     case ROCKER_TEST_DMA_SIZE:
1095         ret = r->test_dma_size;
1096         break;
1097     case ROCKER_TEST_DMA_ADDR:
1098         ret = (uint32_t)r->test_dma_addr;
1099         break;
1100     case ROCKER_TEST_DMA_ADDR + 4:
1101         ret = (uint32_t)(r->test_dma_addr >> 32);
1102         break;
1103     case ROCKER_PORT_PHYS_COUNT:
1104         ret = r->fp_ports;
1105         break;
1106     case ROCKER_PORT_PHYS_LINK_STATUS:
1107         ret = (uint32_t)rocker_port_phys_link_status(r);
1108         break;
1109     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1110         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1111         break;
1112     case ROCKER_PORT_PHYS_ENABLE:
1113         ret = (uint32_t)rocker_port_phys_enable_read(r);
1114         break;
1115     case ROCKER_PORT_PHYS_ENABLE + 4:
1116         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1117         break;
1118     case ROCKER_SWITCH_ID:
1119         ret = (uint32_t)r->switch_id;
1120         break;
1121     case ROCKER_SWITCH_ID + 4:
1122         ret = (uint32_t)(r->switch_id >> 32);
1123         break;
1124     default:
1125         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1126         ret = 0;
1127         break;
1128     }
1129     return ret;
1130 }
1131 
1132 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1133 {
1134     Rocker *r = opaque;
1135     uint64_t ret;
1136 
1137     if (rocker_addr_is_desc_reg(r, addr)) {
1138         unsigned index = ROCKER_RING_INDEX(addr);
1139         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1140 
1141         switch (addr & ROCKER_DMA_DESC_MASK) {
1142         case ROCKER_DMA_DESC_ADDR_OFFSET:
1143             ret = desc_ring_get_base_addr(r->rings[index]);
1144             break;
1145         default:
1146             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1147                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1148             ret = 0;
1149             break;
1150         }
1151         return ret;
1152     }
1153 
1154     switch (addr) {
1155     case ROCKER_BOGUS_REG0:
1156     case ROCKER_BOGUS_REG2:
1157         ret = 0xDEADBABEDEADBABEULL;
1158         break;
1159     case ROCKER_TEST_REG64:
1160         ret = r->test_reg64 * 2;
1161         break;
1162     case ROCKER_TEST_DMA_ADDR:
1163         ret = r->test_dma_addr;
1164         break;
1165     case ROCKER_PORT_PHYS_LINK_STATUS:
1166         ret = rocker_port_phys_link_status(r);
1167         break;
1168     case ROCKER_PORT_PHYS_ENABLE:
1169         ret = rocker_port_phys_enable_read(r);
1170         break;
1171     case ROCKER_SWITCH_ID:
1172         ret = r->switch_id;
1173         break;
1174     default:
1175         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1176         ret = 0;
1177         break;
1178     }
1179     return ret;
1180 }
1181 
1182 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1183 {
1184     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1185             rocker_reg_name(opaque, addr), addr, size);
1186 
1187     switch (size) {
1188     case 4:
1189         return rocker_io_readl(opaque, addr);
1190     case 8:
1191         return rocker_io_readq(opaque, addr);
1192     }
1193 
1194     return -1;
1195 }
1196 
1197 static const MemoryRegionOps rocker_mmio_ops = {
1198     .read = rocker_mmio_read,
1199     .write = rocker_mmio_write,
1200     .endianness = DEVICE_LITTLE_ENDIAN,
1201     .valid = {
1202         .min_access_size = 4,
1203         .max_access_size = 8,
1204     },
1205     .impl = {
1206         .min_access_size = 4,
1207         .max_access_size = 8,
1208     },
1209 };
1210 
1211 static void rocker_msix_vectors_unuse(Rocker *r,
1212                                       unsigned int num_vectors)
1213 {
1214     PCIDevice *dev = PCI_DEVICE(r);
1215     int i;
1216 
1217     for (i = 0; i < num_vectors; i++) {
1218         msix_vector_unuse(dev, i);
1219     }
1220 }
1221 
1222 static int rocker_msix_vectors_use(Rocker *r,
1223                                    unsigned int num_vectors)
1224 {
1225     PCIDevice *dev = PCI_DEVICE(r);
1226     int err;
1227     int i;
1228 
1229     for (i = 0; i < num_vectors; i++) {
1230         err = msix_vector_use(dev, i);
1231         if (err) {
1232             goto rollback;
1233         }
1234     }
1235     return 0;
1236 
1237 rollback:
1238     rocker_msix_vectors_unuse(r, i);
1239     return err;
1240 }
1241 
1242 static int rocker_msix_init(Rocker *r, Error **errp)
1243 {
1244     PCIDevice *dev = PCI_DEVICE(r);
1245     int err;
1246 
1247     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1248                     &r->msix_bar,
1249                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1250                     &r->msix_bar,
1251                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1252                     0, errp);
1253     if (err) {
1254         return err;
1255     }
1256 
1257     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1258     if (err) {
1259         goto err_msix_vectors_use;
1260     }
1261 
1262     return 0;
1263 
1264 err_msix_vectors_use:
1265     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1266     return err;
1267 }
1268 
1269 static void rocker_msix_uninit(Rocker *r)
1270 {
1271     PCIDevice *dev = PCI_DEVICE(r);
1272 
1273     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1274     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1275 }
1276 
1277 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1278 {
1279     int i;
1280 
1281     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1282         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1283             return r->worlds[i];
1284         }
1285     }
1286     return NULL;
1287 }
1288 
1289 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1290 {
1291     Rocker *r = ROCKER(dev);
1292     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1293     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1294     static int sw_index;
1295     int i, err = 0;
1296 
1297     /* allocate worlds */
1298 
1299     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1300 
1301     if (!r->world_name) {
1302         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1303     }
1304 
1305     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1306     if (!r->world_dflt) {
1307         error_setg(errp,
1308                 "invalid argument requested world %s does not exist",
1309                 r->world_name);
1310         goto err_world_type_by_name;
1311     }
1312 
1313     /* set up memory-mapped region at BAR0 */
1314 
1315     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1316                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1317     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1318                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1319 
1320     /* set up memory-mapped region for MSI-X */
1321 
1322     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1323                        ROCKER_PCI_MSIX_BAR_SIZE);
1324     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1325                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1326 
1327     /* MSI-X init */
1328 
1329     err = rocker_msix_init(r, errp);
1330     if (err) {
1331         goto err_msix_init;
1332     }
1333 
1334     /* validate switch properties */
1335 
1336     if (!r->name) {
1337         r->name = g_strdup(TYPE_ROCKER);
1338     }
1339 
1340     if (rocker_find(r->name)) {
1341         error_setg(errp, "%s already exists", r->name);
1342         goto err_duplicate;
1343     }
1344 
1345     /* Rocker name is passed in port name requests to OS with the intention
1346      * that the name is used in interface names. Limit the length of the
1347      * rocker name to avoid naming problems in the OS. Also, adding the
1348      * port number as p# and unganged breakout b#, where # is at most 2
1349      * digits, so leave room for it too (-1 for string terminator, -3 for
1350      * p# and -3 for b#)
1351      */
1352 #define ROCKER_IFNAMSIZ 16
1353 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1354     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1355         error_setg(errp,
1356                 "name too long; please shorten to at most %d chars",
1357                 MAX_ROCKER_NAME_LEN);
1358         goto err_name_too_long;
1359     }
1360 
1361     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1362         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1363         r->fp_start_macaddr.a[4] += (sw_index++);
1364     }
1365 
1366     if (!r->switch_id) {
1367         memcpy(&r->switch_id, &r->fp_start_macaddr,
1368                sizeof(r->fp_start_macaddr));
1369     }
1370 
1371     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1372         r->fp_ports = ROCKER_FP_PORTS_MAX;
1373     }
1374 
1375     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1376 
1377     /* Rings are ordered like this:
1378      * - command ring
1379      * - event ring
1380      * - port0 tx ring
1381      * - port0 rx ring
1382      * - port1 tx ring
1383      * - port1 rx ring
1384      * .....
1385      */
1386 
1387     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1388         DescRing *ring = desc_ring_alloc(r, i);
1389 
1390         if (i == ROCKER_RING_CMD) {
1391             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1392         } else if (i == ROCKER_RING_EVENT) {
1393             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1394         } else if (i % 2 == 0) {
1395             desc_ring_set_consume(ring, tx_consume,
1396                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1397         } else if (i % 2 == 1) {
1398             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1399         }
1400 
1401         r->rings[i] = ring;
1402     }
1403 
1404     for (i = 0; i < r->fp_ports; i++) {
1405         FpPort *port =
1406             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1407                           i, &r->fp_ports_peers[i]);
1408 
1409         r->fp_port[i] = port;
1410         fp_port_set_world(port, r->world_dflt);
1411     }
1412 
1413     QLIST_INSERT_HEAD(&rockers, r, next);
1414 
1415     return;
1416 
1417 err_name_too_long:
1418 err_duplicate:
1419     rocker_msix_uninit(r);
1420 err_msix_init:
1421     object_unparent(OBJECT(&r->msix_bar));
1422     object_unparent(OBJECT(&r->mmio));
1423 err_world_type_by_name:
1424     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1425         if (r->worlds[i]) {
1426             world_free(r->worlds[i]);
1427         }
1428     }
1429 }
1430 
1431 static void pci_rocker_uninit(PCIDevice *dev)
1432 {
1433     Rocker *r = ROCKER(dev);
1434     int i;
1435 
1436     QLIST_REMOVE(r, next);
1437 
1438     for (i = 0; i < r->fp_ports; i++) {
1439         FpPort *port = r->fp_port[i];
1440 
1441         fp_port_free(port);
1442         r->fp_port[i] = NULL;
1443     }
1444 
1445     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1446         if (r->rings[i]) {
1447             desc_ring_free(r->rings[i]);
1448         }
1449     }
1450     g_free(r->rings);
1451 
1452     rocker_msix_uninit(r);
1453     object_unparent(OBJECT(&r->msix_bar));
1454     object_unparent(OBJECT(&r->mmio));
1455 
1456     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1457         if (r->worlds[i]) {
1458             world_free(r->worlds[i]);
1459         }
1460     }
1461     g_free(r->fp_ports_peers);
1462 }
1463 
1464 static void rocker_reset(DeviceState *dev)
1465 {
1466     Rocker *r = ROCKER(dev);
1467     int i;
1468 
1469     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1470         if (r->worlds[i]) {
1471             world_reset(r->worlds[i]);
1472         }
1473     }
1474     for (i = 0; i < r->fp_ports; i++) {
1475         fp_port_reset(r->fp_port[i]);
1476         fp_port_set_world(r->fp_port[i], r->world_dflt);
1477     }
1478 
1479     r->test_reg = 0;
1480     r->test_reg64 = 0;
1481     r->test_dma_addr = 0;
1482     r->test_dma_size = 0;
1483 
1484     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1485         desc_ring_reset(r->rings[i]);
1486     }
1487 
1488     DPRINTF("Reset done\n");
1489 }
1490 
1491 static Property rocker_properties[] = {
1492     DEFINE_PROP_STRING("name", Rocker, name),
1493     DEFINE_PROP_STRING("world", Rocker, world_name),
1494     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1495                         fp_start_macaddr),
1496     DEFINE_PROP_UINT64("switch_id", Rocker,
1497                        switch_id, 0),
1498     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1499                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1500     DEFINE_PROP_END_OF_LIST(),
1501 };
1502 
1503 static const VMStateDescription rocker_vmsd = {
1504     .name = TYPE_ROCKER,
1505     .unmigratable = 1,
1506 };
1507 
1508 static void rocker_class_init(ObjectClass *klass, void *data)
1509 {
1510     DeviceClass *dc = DEVICE_CLASS(klass);
1511     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1512 
1513     k->realize = pci_rocker_realize;
1514     k->exit = pci_rocker_uninit;
1515     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1516     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1517     k->revision = ROCKER_PCI_REVISION;
1518     k->class_id = PCI_CLASS_NETWORK_OTHER;
1519     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1520     dc->desc = "Rocker Switch";
1521     dc->reset = rocker_reset;
1522     dc->props = rocker_properties;
1523     dc->vmsd = &rocker_vmsd;
1524 }
1525 
1526 static const TypeInfo rocker_info = {
1527     .name          = TYPE_ROCKER,
1528     .parent        = TYPE_PCI_DEVICE,
1529     .instance_size = sizeof(Rocker),
1530     .class_init    = rocker_class_init,
1531     .interfaces = (InterfaceInfo[]) {
1532         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1533         { },
1534     },
1535 };
1536 
1537 static void rocker_register_types(void)
1538 {
1539     type_register_static(&rocker_info);
1540 }
1541 
1542 type_init(rocker_register_types)
1543