xref: /qemu/hw/net/rocker/rocker.c (revision 63d2ada2)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "hw/hw.h"
19 #include "hw/pci/pci.h"
20 #include "hw/pci/msix.h"
21 #include "net/net.h"
22 #include "net/eth.h"
23 #include "qemu/iov.h"
24 #include "qemu/bitops.h"
25 #include "qmp-commands.h"
26 
27 #include "rocker.h"
28 #include "rocker_hw.h"
29 #include "rocker_fp.h"
30 #include "rocker_desc.h"
31 #include "rocker_tlv.h"
32 #include "rocker_world.h"
33 #include "rocker_of_dpa.h"
34 
35 struct rocker {
36     /* private */
37     PCIDevice parent_obj;
38     /* public */
39 
40     MemoryRegion mmio;
41     MemoryRegion msix_bar;
42 
43     /* switch configuration */
44     char *name;                  /* switch name */
45     uint32_t fp_ports;           /* front-panel port count */
46     NICPeers *fp_ports_peers;
47     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
48     uint64_t switch_id;          /* switch id */
49 
50     /* front-panel ports */
51     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
52 
53     /* register backings */
54     uint32_t test_reg;
55     uint64_t test_reg64;
56     dma_addr_t test_dma_addr;
57     uint32_t test_dma_size;
58     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
59 
60     /* desc rings */
61     DescRing **rings;
62 
63     /* switch worlds */
64     World *worlds[ROCKER_WORLD_TYPE_MAX];
65     World *world_dflt;
66 
67     QLIST_ENTRY(rocker) next;
68 };
69 
70 #define ROCKER "rocker"
71 
72 #define to_rocker(obj) \
73     OBJECT_CHECK(Rocker, (obj), ROCKER)
74 
75 static QLIST_HEAD(, rocker) rockers;
76 
77 Rocker *rocker_find(const char *name)
78 {
79     Rocker *r;
80 
81     QLIST_FOREACH(r, &rockers, next)
82         if (strcmp(r->name, name) == 0) {
83             return r;
84         }
85 
86     return NULL;
87 }
88 
89 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
90 {
91     if (type < ROCKER_WORLD_TYPE_MAX) {
92         return r->worlds[type];
93     }
94     return NULL;
95 }
96 
97 uint32_t rocker_fp_ports(Rocker *r)
98 {
99     return r->fp_ports;
100 }
101 
102 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
103                                             DescRing *ring)
104 {
105     return (desc_ring_index(ring) - 2) / 2 + 1;
106 }
107 
108 static int tx_consume(Rocker *r, DescInfo *info)
109 {
110     PCIDevice *dev = PCI_DEVICE(r);
111     char *buf = desc_get_buf(info, true);
112     RockerTlv *tlv_frag;
113     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
114     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
115     uint32_t pport;
116     uint32_t port;
117     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
118     uint16_t tx_l3_csum_off = 0;
119     uint16_t tx_tso_mss = 0;
120     uint16_t tx_tso_hdr_len = 0;
121     int iovcnt = 0;
122     int err = ROCKER_OK;
123     int rem;
124     int i;
125 
126     if (!buf) {
127         return -ROCKER_ENXIO;
128     }
129 
130     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
131 
132     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
133         return -ROCKER_EINVAL;
134     }
135 
136     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
137     if (!fp_port_from_pport(pport, &port)) {
138         return -ROCKER_EINVAL;
139     }
140 
141     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
142         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
143     }
144 
145     switch (tx_offload) {
146     case ROCKER_TX_OFFLOAD_L3_CSUM:
147         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
148             return -ROCKER_EINVAL;
149         }
150     case ROCKER_TX_OFFLOAD_TSO:
151         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
152             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
153             return -ROCKER_EINVAL;
154         }
155     }
156 
157     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
158         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
159     }
160 
161     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
162         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
163     }
164 
165     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
166         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
167     }
168 
169     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
170         hwaddr frag_addr;
171         uint16_t frag_len;
172 
173         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
174             err = -ROCKER_EINVAL;
175             goto err_bad_attr;
176         }
177 
178         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
179 
180         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
181             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
182             err = -ROCKER_EINVAL;
183             goto err_bad_attr;
184         }
185 
186         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
187         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
188 
189         iov[iovcnt].iov_len = frag_len;
190         iov[iovcnt].iov_base = g_malloc(frag_len);
191         if (!iov[iovcnt].iov_base) {
192             err = -ROCKER_ENOMEM;
193             goto err_no_mem;
194         }
195 
196         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
197                      iov[iovcnt].iov_len)) {
198             err = -ROCKER_ENXIO;
199             goto err_bad_io;
200         }
201 
202         if (++iovcnt > ROCKER_TX_FRAGS_MAX) {
203             goto err_too_many_frags;
204         }
205     }
206 
207     if (iovcnt) {
208         /* XXX perform Tx offloads */
209         /* XXX   silence compiler for now */
210         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
211     }
212 
213     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
214 
215 err_too_many_frags:
216 err_bad_io:
217 err_no_mem:
218 err_bad_attr:
219     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
220         if (iov[i].iov_base) {
221             g_free(iov[i].iov_base);
222         }
223     }
224 
225     return err;
226 }
227 
228 static int cmd_get_port_settings(Rocker *r,
229                                  DescInfo *info, char *buf,
230                                  RockerTlv *cmd_info_tlv)
231 {
232     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
233     RockerTlv *nest;
234     FpPort *fp_port;
235     uint32_t pport;
236     uint32_t port;
237     uint32_t speed;
238     uint8_t duplex;
239     uint8_t autoneg;
240     uint8_t learning;
241     MACAddr macaddr;
242     enum rocker_world_type mode;
243     size_t tlv_size;
244     int pos;
245     int err;
246 
247     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
248                             cmd_info_tlv);
249 
250     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
251         return -ROCKER_EINVAL;
252     }
253 
254     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
255     if (!fp_port_from_pport(pport, &port)) {
256         return -ROCKER_EINVAL;
257     }
258     fp_port = r->fp_port[port];
259 
260     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
261     if (err) {
262         return err;
263     }
264 
265     fp_port_get_macaddr(fp_port, &macaddr);
266     mode = world_type(fp_port_get_world(fp_port));
267     learning = fp_port_get_learning(fp_port);
268 
269     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
270                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
271                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
272                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
273                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
274                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
275                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
276                rocker_tlv_total_size(sizeof(uint8_t));    /*   learning */
277 
278     if (tlv_size > desc_buf_size(info)) {
279         return -ROCKER_EMSGSIZE;
280     }
281 
282     pos = 0;
283     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
284     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
285     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
286     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
287     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
288     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
289                    sizeof(macaddr.a), macaddr.a);
290     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
291     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
292                       learning);
293     rocker_tlv_nest_end(buf, &pos, nest);
294 
295     return desc_set_buf(info, tlv_size);
296 }
297 
298 static int cmd_set_port_settings(Rocker *r,
299                                  RockerTlv *cmd_info_tlv)
300 {
301     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
302     FpPort *fp_port;
303     uint32_t pport;
304     uint32_t port;
305     uint32_t speed;
306     uint8_t duplex;
307     uint8_t autoneg;
308     uint8_t learning;
309     MACAddr macaddr;
310     enum rocker_world_type mode;
311     int err;
312 
313     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
314                             cmd_info_tlv);
315 
316     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
317         return -ROCKER_EINVAL;
318     }
319 
320     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
321     if (!fp_port_from_pport(pport, &port)) {
322         return -ROCKER_EINVAL;
323     }
324     fp_port = r->fp_port[port];
325 
326     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
327         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
328         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
329 
330         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
331         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
332         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
333 
334         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
335         if (err) {
336             return err;
337         }
338     }
339 
340     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
341         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
342             sizeof(macaddr.a)) {
343             return -ROCKER_EINVAL;
344         }
345         memcpy(macaddr.a,
346                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
347                sizeof(macaddr.a));
348         fp_port_set_macaddr(fp_port, &macaddr);
349     }
350 
351     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
352         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
353         fp_port_set_world(fp_port, r->worlds[mode]);
354     }
355 
356     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
357         learning =
358             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
359         fp_port_set_learning(fp_port, learning);
360     }
361 
362     return ROCKER_OK;
363 }
364 
365 static int cmd_consume(Rocker *r, DescInfo *info)
366 {
367     char *buf = desc_get_buf(info, false);
368     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
369     RockerTlv *info_tlv;
370     World *world;
371     uint16_t cmd;
372     int err;
373 
374     if (!buf) {
375         return -ROCKER_ENXIO;
376     }
377 
378     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
379 
380     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
381         return -ROCKER_EINVAL;
382     }
383 
384     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
385     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
386 
387     /* This might be reworked to something like this:
388      * Every world will have an array of command handlers from
389      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
390      * up to each world to implement whatever command it want.
391      * It can reference "generic" commands as cmd_set_port_settings or
392      * cmd_get_port_settings
393      */
394 
395     switch (cmd) {
396     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
397     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
398     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
399     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
400     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
401     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
402     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
403     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
404         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
405         err = world_do_cmd(world, info, buf, cmd, info_tlv);
406         break;
407     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
408         err = cmd_get_port_settings(r, info, buf, info_tlv);
409         break;
410     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
411         err = cmd_set_port_settings(r, info_tlv);
412         break;
413     default:
414         err = -ROCKER_EINVAL;
415         break;
416     }
417 
418     return err;
419 }
420 
421 static void rocker_msix_irq(Rocker *r, unsigned vector)
422 {
423     PCIDevice *dev = PCI_DEVICE(r);
424 
425     DPRINTF("MSI-X notify request for vector %d\n", vector);
426     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
427         DPRINTF("incorrect vector %d\n", vector);
428         return;
429     }
430     msix_notify(dev, vector);
431 }
432 
433 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
434 {
435     DescRing *ring = r->rings[ROCKER_RING_EVENT];
436     DescInfo *info = desc_ring_fetch_desc(ring);
437     RockerTlv *nest;
438     char *buf;
439     size_t tlv_size;
440     int pos;
441     int err;
442 
443     if (!info) {
444         return -ROCKER_ENOBUFS;
445     }
446 
447     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
448                rocker_tlv_total_size(0) +                 /* nest */
449                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
450                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
451 
452     if (tlv_size > desc_buf_size(info)) {
453         err = -ROCKER_EMSGSIZE;
454         goto err_too_big;
455     }
456 
457     buf = desc_get_buf(info, false);
458     if (!buf) {
459         err = -ROCKER_ENOMEM;
460         goto err_no_mem;
461     }
462 
463     pos = 0;
464     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
465                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
466     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
467     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
468     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
469                       link_up ? 1 : 0);
470     rocker_tlv_nest_end(buf, &pos, nest);
471 
472     err = desc_set_buf(info, tlv_size);
473 
474 err_too_big:
475 err_no_mem:
476     if (desc_ring_post_desc(ring, err)) {
477         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
478     }
479 
480     return err;
481 }
482 
483 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
484                                uint16_t vlan_id)
485 {
486     DescRing *ring = r->rings[ROCKER_RING_EVENT];
487     DescInfo *info;
488     FpPort *fp_port;
489     uint32_t port;
490     RockerTlv *nest;
491     char *buf;
492     size_t tlv_size;
493     int pos;
494     int err;
495 
496     if (!fp_port_from_pport(pport, &port)) {
497         return -ROCKER_EINVAL;
498     }
499     fp_port = r->fp_port[port];
500     if (!fp_port_get_learning(fp_port)) {
501         return ROCKER_OK;
502     }
503 
504     info = desc_ring_fetch_desc(ring);
505     if (!info) {
506         return -ROCKER_ENOBUFS;
507     }
508 
509     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
510                rocker_tlv_total_size(0) +                 /* nest */
511                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
512                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
513                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
514 
515     if (tlv_size > desc_buf_size(info)) {
516         err = -ROCKER_EMSGSIZE;
517         goto err_too_big;
518     }
519 
520     buf = desc_get_buf(info, false);
521     if (!buf) {
522         err = -ROCKER_ENOMEM;
523         goto err_no_mem;
524     }
525 
526     pos = 0;
527     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
528                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
529     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
530     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
531     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
532     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
533     rocker_tlv_nest_end(buf, &pos, nest);
534 
535     err = desc_set_buf(info, tlv_size);
536 
537 err_too_big:
538 err_no_mem:
539     if (desc_ring_post_desc(ring, err)) {
540         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
541     }
542 
543     return err;
544 }
545 
546 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
547                                                      uint32_t pport)
548 {
549     return r->rings[(pport - 1) * 2 + 3];
550 }
551 
552 int rx_produce(World *world, uint32_t pport,
553                const struct iovec *iov, int iovcnt)
554 {
555     Rocker *r = world_rocker(world);
556     PCIDevice *dev = (PCIDevice *)r;
557     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
558     DescInfo *info = desc_ring_fetch_desc(ring);
559     char *data;
560     size_t data_size = iov_size(iov, iovcnt);
561     char *buf;
562     uint16_t rx_flags = 0;
563     uint16_t rx_csum = 0;
564     size_t tlv_size;
565     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
566     hwaddr frag_addr;
567     uint16_t frag_max_len;
568     int pos;
569     int err;
570 
571     if (!info) {
572         return -ROCKER_ENOBUFS;
573     }
574 
575     buf = desc_get_buf(info, false);
576     if (!buf) {
577         err = -ROCKER_ENXIO;
578         goto out;
579     }
580     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
581 
582     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
583         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
584         err = -ROCKER_EINVAL;
585         goto out;
586     }
587 
588     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
589     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
590 
591     if (data_size > frag_max_len) {
592         err = -ROCKER_EMSGSIZE;
593         goto out;
594     }
595 
596     /* XXX calc rx flags/csum */
597 
598     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
599                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
600                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
601                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
602                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
603 
604     if (tlv_size > desc_buf_size(info)) {
605         err = -ROCKER_EMSGSIZE;
606         goto out;
607     }
608 
609     /* TODO:
610      * iov dma write can be optimized in similar way e1000 does it in
611      * e1000_receive_iov. But maybe if would make sense to introduce
612      * generic helper iov_dma_write.
613      */
614 
615     data = g_malloc(data_size);
616     if (!data) {
617         err = -ROCKER_ENOMEM;
618         goto out;
619     }
620     iov_to_buf(iov, iovcnt, 0, data, data_size);
621     pci_dma_write(dev, frag_addr, data, data_size);
622     g_free(data);
623 
624     pos = 0;
625     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
626     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
627     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
628     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
629     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
630 
631     err = desc_set_buf(info, tlv_size);
632 
633 out:
634     if (desc_ring_post_desc(ring, err)) {
635         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
636     }
637 
638     return err;
639 }
640 
641 int rocker_port_eg(Rocker *r, uint32_t pport,
642                    const struct iovec *iov, int iovcnt)
643 {
644     FpPort *fp_port;
645     uint32_t port;
646 
647     if (!fp_port_from_pport(pport, &port)) {
648         return -ROCKER_EINVAL;
649     }
650 
651     fp_port = r->fp_port[port];
652 
653     return fp_port_eg(fp_port, iov, iovcnt);
654 }
655 
656 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
657 {
658     PCIDevice *dev = PCI_DEVICE(r);
659     char *buf;
660     int i;
661 
662     buf = g_malloc(r->test_dma_size);
663 
664     if (!buf) {
665         DPRINTF("test dma buffer alloc failed");
666         return;
667     }
668 
669     switch (val) {
670     case ROCKER_TEST_DMA_CTRL_CLEAR:
671         memset(buf, 0, r->test_dma_size);
672         break;
673     case ROCKER_TEST_DMA_CTRL_FILL:
674         memset(buf, 0x96, r->test_dma_size);
675         break;
676     case ROCKER_TEST_DMA_CTRL_INVERT:
677         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
678         for (i = 0; i < r->test_dma_size; i++) {
679             buf[i] = ~buf[i];
680         }
681         break;
682     default:
683         DPRINTF("not test dma control val=0x%08x\n", val);
684         goto err_out;
685     }
686     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
687 
688     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
689 
690 err_out:
691     g_free(buf);
692 }
693 
694 static void rocker_reset(DeviceState *dev);
695 
696 static void rocker_control(Rocker *r, uint32_t val)
697 {
698     if (val & ROCKER_CONTROL_RESET) {
699         rocker_reset(DEVICE(r));
700     }
701 }
702 
703 static int rocker_pci_ring_count(Rocker *r)
704 {
705     /* There are:
706      * - command ring
707      * - event ring
708      * - tx and rx ring per each port
709      */
710     return 2 + (2 * r->fp_ports);
711 }
712 
713 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
714 {
715     hwaddr start = ROCKER_DMA_DESC_BASE;
716     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
717 
718     return addr >= start && addr < end;
719 }
720 
721 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
722 {
723     int i;
724     bool old_enabled;
725     bool new_enabled;
726     FpPort *fp_port;
727 
728     for (i = 0; i < r->fp_ports; i++) {
729         fp_port = r->fp_port[i];
730         old_enabled = fp_port_enabled(fp_port);
731         new_enabled = (new >> (i + 1)) & 0x1;
732         if (new_enabled == old_enabled) {
733             continue;
734         }
735         if (new_enabled) {
736             fp_port_enable(r->fp_port[i]);
737         } else {
738             fp_port_disable(r->fp_port[i]);
739         }
740     }
741 }
742 
743 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
744 {
745     Rocker *r = opaque;
746 
747     if (rocker_addr_is_desc_reg(r, addr)) {
748         unsigned index = ROCKER_RING_INDEX(addr);
749         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
750 
751         switch (offset) {
752         case ROCKER_DMA_DESC_ADDR_OFFSET:
753             r->lower32 = (uint64_t)val;
754             break;
755         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
756             desc_ring_set_base_addr(r->rings[index],
757                                     ((uint64_t)val) << 32 | r->lower32);
758             r->lower32 = 0;
759             break;
760         case ROCKER_DMA_DESC_SIZE_OFFSET:
761             desc_ring_set_size(r->rings[index], val);
762             break;
763         case ROCKER_DMA_DESC_HEAD_OFFSET:
764             if (desc_ring_set_head(r->rings[index], val)) {
765                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
766             }
767             break;
768         case ROCKER_DMA_DESC_CTRL_OFFSET:
769             desc_ring_set_ctrl(r->rings[index], val);
770             break;
771         case ROCKER_DMA_DESC_CREDITS_OFFSET:
772             if (desc_ring_ret_credits(r->rings[index], val)) {
773                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
774             }
775             break;
776         default:
777             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
778                     " val=0x%08x (ring %d, addr=0x%02x)\n",
779                     addr, val, index, offset);
780             break;
781         }
782         return;
783     }
784 
785     switch (addr) {
786     case ROCKER_TEST_REG:
787         r->test_reg = val;
788         break;
789     case ROCKER_TEST_REG64:
790     case ROCKER_TEST_DMA_ADDR:
791     case ROCKER_PORT_PHYS_ENABLE:
792         r->lower32 = (uint64_t)val;
793         break;
794     case ROCKER_TEST_REG64 + 4:
795         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
796         r->lower32 = 0;
797         break;
798     case ROCKER_TEST_IRQ:
799         rocker_msix_irq(r, val);
800         break;
801     case ROCKER_TEST_DMA_SIZE:
802         r->test_dma_size = val;
803         break;
804     case ROCKER_TEST_DMA_ADDR + 4:
805         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
806         r->lower32 = 0;
807         break;
808     case ROCKER_TEST_DMA_CTRL:
809         rocker_test_dma_ctrl(r, val);
810         break;
811     case ROCKER_CONTROL:
812         rocker_control(r, val);
813         break;
814     case ROCKER_PORT_PHYS_ENABLE + 4:
815         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
816         r->lower32 = 0;
817         break;
818     default:
819         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
820                 " val=0x%08x\n", addr, val);
821         break;
822     }
823 }
824 
825 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
826 {
827     Rocker *r = opaque;
828 
829     if (rocker_addr_is_desc_reg(r, addr)) {
830         unsigned index = ROCKER_RING_INDEX(addr);
831         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
832 
833         switch (offset) {
834         case ROCKER_DMA_DESC_ADDR_OFFSET:
835             desc_ring_set_base_addr(r->rings[index], val);
836             break;
837         default:
838             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
839                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
840                     addr, val, index, offset);
841             break;
842         }
843         return;
844     }
845 
846     switch (addr) {
847     case ROCKER_TEST_REG64:
848         r->test_reg64 = val;
849         break;
850     case ROCKER_TEST_DMA_ADDR:
851         r->test_dma_addr = val;
852         break;
853     case ROCKER_PORT_PHYS_ENABLE:
854         rocker_port_phys_enable_write(r, val);
855         break;
856     default:
857         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
858                 " val=0x" TARGET_FMT_plx "\n", addr, val);
859         break;
860     }
861 }
862 
863 #ifdef DEBUG_ROCKER
864 #define regname(reg) case (reg): return #reg
865 static const char *rocker_reg_name(void *opaque, hwaddr addr)
866 {
867     Rocker *r = opaque;
868 
869     if (rocker_addr_is_desc_reg(r, addr)) {
870         unsigned index = ROCKER_RING_INDEX(addr);
871         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
872         static char buf[100];
873         char ring_name[10];
874 
875         switch (index) {
876         case 0:
877             sprintf(ring_name, "cmd");
878             break;
879         case 1:
880             sprintf(ring_name, "event");
881             break;
882         default:
883             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
884                     (index - 2) / 2);
885         }
886 
887         switch (offset) {
888         case ROCKER_DMA_DESC_ADDR_OFFSET:
889             sprintf(buf, "Ring[%s] ADDR", ring_name);
890             return buf;
891         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
892             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
893             return buf;
894         case ROCKER_DMA_DESC_SIZE_OFFSET:
895             sprintf(buf, "Ring[%s] SIZE", ring_name);
896             return buf;
897         case ROCKER_DMA_DESC_HEAD_OFFSET:
898             sprintf(buf, "Ring[%s] HEAD", ring_name);
899             return buf;
900         case ROCKER_DMA_DESC_TAIL_OFFSET:
901             sprintf(buf, "Ring[%s] TAIL", ring_name);
902             return buf;
903         case ROCKER_DMA_DESC_CTRL_OFFSET:
904             sprintf(buf, "Ring[%s] CTRL", ring_name);
905             return buf;
906         case ROCKER_DMA_DESC_CREDITS_OFFSET:
907             sprintf(buf, "Ring[%s] CREDITS", ring_name);
908             return buf;
909         default:
910             sprintf(buf, "Ring[%s] ???", ring_name);
911             return buf;
912         }
913     } else {
914         switch (addr) {
915             regname(ROCKER_BOGUS_REG0);
916             regname(ROCKER_BOGUS_REG1);
917             regname(ROCKER_BOGUS_REG2);
918             regname(ROCKER_BOGUS_REG3);
919             regname(ROCKER_TEST_REG);
920             regname(ROCKER_TEST_REG64);
921             regname(ROCKER_TEST_REG64+4);
922             regname(ROCKER_TEST_IRQ);
923             regname(ROCKER_TEST_DMA_ADDR);
924             regname(ROCKER_TEST_DMA_ADDR+4);
925             regname(ROCKER_TEST_DMA_SIZE);
926             regname(ROCKER_TEST_DMA_CTRL);
927             regname(ROCKER_CONTROL);
928             regname(ROCKER_PORT_PHYS_COUNT);
929             regname(ROCKER_PORT_PHYS_LINK_STATUS);
930             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
931             regname(ROCKER_PORT_PHYS_ENABLE);
932             regname(ROCKER_PORT_PHYS_ENABLE+4);
933             regname(ROCKER_SWITCH_ID);
934             regname(ROCKER_SWITCH_ID+4);
935         }
936     }
937     return "???";
938 }
939 #else
940 static const char *rocker_reg_name(void *opaque, hwaddr addr)
941 {
942     return NULL;
943 }
944 #endif
945 
946 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
947                               unsigned size)
948 {
949     DPRINTF("Write %s addr " TARGET_FMT_plx
950             ", size %u, val " TARGET_FMT_plx "\n",
951             rocker_reg_name(opaque, addr), addr, size, val);
952 
953     switch (size) {
954     case 4:
955         rocker_io_writel(opaque, addr, val);
956         break;
957     case 8:
958         rocker_io_writeq(opaque, addr, val);
959         break;
960     }
961 }
962 
963 static uint64_t rocker_port_phys_link_status(Rocker *r)
964 {
965     int i;
966     uint64_t status = 0;
967 
968     for (i = 0; i < r->fp_ports; i++) {
969         FpPort *port = r->fp_port[i];
970 
971         if (fp_port_get_link_up(port)) {
972             status |= 1 << (i + 1);
973         }
974     }
975     return status;
976 }
977 
978 static uint64_t rocker_port_phys_enable_read(Rocker *r)
979 {
980     int i;
981     uint64_t ret = 0;
982 
983     for (i = 0; i < r->fp_ports; i++) {
984         FpPort *port = r->fp_port[i];
985 
986         if (fp_port_enabled(port)) {
987             ret |= 1 << (i + 1);
988         }
989     }
990     return ret;
991 }
992 
993 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
994 {
995     Rocker *r = opaque;
996     uint32_t ret;
997 
998     if (rocker_addr_is_desc_reg(r, addr)) {
999         unsigned index = ROCKER_RING_INDEX(addr);
1000         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1001 
1002         switch (offset) {
1003         case ROCKER_DMA_DESC_ADDR_OFFSET:
1004             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1005             break;
1006         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1007             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1008             break;
1009         case ROCKER_DMA_DESC_SIZE_OFFSET:
1010             ret = desc_ring_get_size(r->rings[index]);
1011             break;
1012         case ROCKER_DMA_DESC_HEAD_OFFSET:
1013             ret = desc_ring_get_head(r->rings[index]);
1014             break;
1015         case ROCKER_DMA_DESC_TAIL_OFFSET:
1016             ret = desc_ring_get_tail(r->rings[index]);
1017             break;
1018         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1019             ret = desc_ring_get_credits(r->rings[index]);
1020             break;
1021         default:
1022             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1023                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1024             ret = 0;
1025             break;
1026         }
1027         return ret;
1028     }
1029 
1030     switch (addr) {
1031     case ROCKER_BOGUS_REG0:
1032     case ROCKER_BOGUS_REG1:
1033     case ROCKER_BOGUS_REG2:
1034     case ROCKER_BOGUS_REG3:
1035         ret = 0xDEADBABE;
1036         break;
1037     case ROCKER_TEST_REG:
1038         ret = r->test_reg * 2;
1039         break;
1040     case ROCKER_TEST_REG64:
1041         ret = (uint32_t)(r->test_reg64 * 2);
1042         break;
1043     case ROCKER_TEST_REG64 + 4:
1044         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1045         break;
1046     case ROCKER_TEST_DMA_SIZE:
1047         ret = r->test_dma_size;
1048         break;
1049     case ROCKER_TEST_DMA_ADDR:
1050         ret = (uint32_t)r->test_dma_addr;
1051         break;
1052     case ROCKER_TEST_DMA_ADDR + 4:
1053         ret = (uint32_t)(r->test_dma_addr >> 32);
1054         break;
1055     case ROCKER_PORT_PHYS_COUNT:
1056         ret = r->fp_ports;
1057         break;
1058     case ROCKER_PORT_PHYS_LINK_STATUS:
1059         ret = (uint32_t)rocker_port_phys_link_status(r);
1060         break;
1061     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1062         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1063         break;
1064     case ROCKER_PORT_PHYS_ENABLE:
1065         ret = (uint32_t)rocker_port_phys_enable_read(r);
1066         break;
1067     case ROCKER_PORT_PHYS_ENABLE + 4:
1068         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1069         break;
1070     case ROCKER_SWITCH_ID:
1071         ret = (uint32_t)r->switch_id;
1072         break;
1073     case ROCKER_SWITCH_ID + 4:
1074         ret = (uint32_t)(r->switch_id >> 32);
1075         break;
1076     default:
1077         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1078         ret = 0;
1079         break;
1080     }
1081     return ret;
1082 }
1083 
1084 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1085 {
1086     Rocker *r = opaque;
1087     uint64_t ret;
1088 
1089     if (rocker_addr_is_desc_reg(r, addr)) {
1090         unsigned index = ROCKER_RING_INDEX(addr);
1091         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1092 
1093         switch (addr & ROCKER_DMA_DESC_MASK) {
1094         case ROCKER_DMA_DESC_ADDR_OFFSET:
1095             ret = desc_ring_get_base_addr(r->rings[index]);
1096             break;
1097         default:
1098             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1099                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1100             ret = 0;
1101             break;
1102         }
1103         return ret;
1104     }
1105 
1106     switch (addr) {
1107     case ROCKER_BOGUS_REG0:
1108     case ROCKER_BOGUS_REG2:
1109         ret = 0xDEADBABEDEADBABEULL;
1110         break;
1111     case ROCKER_TEST_REG64:
1112         ret = r->test_reg64 * 2;
1113         break;
1114     case ROCKER_TEST_DMA_ADDR:
1115         ret = r->test_dma_addr;
1116         break;
1117     case ROCKER_PORT_PHYS_LINK_STATUS:
1118         ret = rocker_port_phys_link_status(r);
1119         break;
1120     case ROCKER_PORT_PHYS_ENABLE:
1121         ret = rocker_port_phys_enable_read(r);
1122         break;
1123     case ROCKER_SWITCH_ID:
1124         ret = r->switch_id;
1125         break;
1126     default:
1127         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1128         ret = 0;
1129         break;
1130     }
1131     return ret;
1132 }
1133 
1134 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1135 {
1136     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1137             rocker_reg_name(opaque, addr), addr, size);
1138 
1139     switch (size) {
1140     case 4:
1141         return rocker_io_readl(opaque, addr);
1142     case 8:
1143         return rocker_io_readq(opaque, addr);
1144     }
1145 
1146     return -1;
1147 }
1148 
1149 static const MemoryRegionOps rocker_mmio_ops = {
1150     .read = rocker_mmio_read,
1151     .write = rocker_mmio_write,
1152     .endianness = DEVICE_LITTLE_ENDIAN,
1153     .valid = {
1154         .min_access_size = 4,
1155         .max_access_size = 8,
1156     },
1157     .impl = {
1158         .min_access_size = 4,
1159         .max_access_size = 8,
1160     },
1161 };
1162 
1163 static void rocker_msix_vectors_unuse(Rocker *r,
1164                                       unsigned int num_vectors)
1165 {
1166     PCIDevice *dev = PCI_DEVICE(r);
1167     int i;
1168 
1169     for (i = 0; i < num_vectors; i++) {
1170         msix_vector_unuse(dev, i);
1171     }
1172 }
1173 
1174 static int rocker_msix_vectors_use(Rocker *r,
1175                                    unsigned int num_vectors)
1176 {
1177     PCIDevice *dev = PCI_DEVICE(r);
1178     int err;
1179     int i;
1180 
1181     for (i = 0; i < num_vectors; i++) {
1182         err = msix_vector_use(dev, i);
1183         if (err) {
1184             goto rollback;
1185         }
1186     }
1187     return 0;
1188 
1189 rollback:
1190     rocker_msix_vectors_unuse(r, i);
1191     return err;
1192 }
1193 
1194 static int rocker_msix_init(Rocker *r)
1195 {
1196     PCIDevice *dev = PCI_DEVICE(r);
1197     int err;
1198 
1199     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1200                     &r->msix_bar,
1201                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1202                     &r->msix_bar,
1203                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1204                     0);
1205     if (err) {
1206         return err;
1207     }
1208 
1209     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1210     if (err) {
1211         goto err_msix_vectors_use;
1212     }
1213 
1214     return 0;
1215 
1216 err_msix_vectors_use:
1217     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1218     return err;
1219 }
1220 
1221 static void rocker_msix_uninit(Rocker *r)
1222 {
1223     PCIDevice *dev = PCI_DEVICE(r);
1224 
1225     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1226     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1227 }
1228 
1229 static int pci_rocker_init(PCIDevice *dev)
1230 {
1231     Rocker *r = to_rocker(dev);
1232     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1233     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1234     static int sw_index;
1235     int i, err = 0;
1236 
1237     /* allocate worlds */
1238 
1239     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1240     r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
1241 
1242     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1243         if (!r->worlds[i]) {
1244             goto err_world_alloc;
1245         }
1246     }
1247 
1248     /* set up memory-mapped region at BAR0 */
1249 
1250     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1251                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1252     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1253                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1254 
1255     /* set up memory-mapped region for MSI-X */
1256 
1257     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1258                        ROCKER_PCI_MSIX_BAR_SIZE);
1259     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1260                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1261 
1262     /* MSI-X init */
1263 
1264     err = rocker_msix_init(r);
1265     if (err) {
1266         goto err_msix_init;
1267     }
1268 
1269     /* validate switch properties */
1270 
1271     if (!r->name) {
1272         r->name = g_strdup(ROCKER);
1273     }
1274 
1275     if (rocker_find(r->name)) {
1276         err = -EEXIST;
1277         goto err_duplicate;
1278     }
1279 
1280     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1281         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1282         r->fp_start_macaddr.a[4] += (sw_index++);
1283     }
1284 
1285     if (!r->switch_id) {
1286         memcpy(&r->switch_id, &r->fp_start_macaddr,
1287                sizeof(r->fp_start_macaddr));
1288     }
1289 
1290     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1291         r->fp_ports = ROCKER_FP_PORTS_MAX;
1292     }
1293 
1294     r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r));
1295     if (!r->rings) {
1296         goto err_rings_alloc;
1297     }
1298 
1299     /* Rings are ordered like this:
1300      * - command ring
1301      * - event ring
1302      * - port0 tx ring
1303      * - port0 rx ring
1304      * - port1 tx ring
1305      * - port1 rx ring
1306      * .....
1307      */
1308 
1309     err = -ENOMEM;
1310     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1311         DescRing *ring = desc_ring_alloc(r, i);
1312 
1313         if (!ring) {
1314             goto err_ring_alloc;
1315         }
1316 
1317         if (i == ROCKER_RING_CMD) {
1318             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1319         } else if (i == ROCKER_RING_EVENT) {
1320             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1321         } else if (i % 2 == 0) {
1322             desc_ring_set_consume(ring, tx_consume,
1323                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1324         } else if (i % 2 == 1) {
1325             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1326         }
1327 
1328         r->rings[i] = ring;
1329     }
1330 
1331     for (i = 0; i < r->fp_ports; i++) {
1332         FpPort *port =
1333             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1334                           i, &r->fp_ports_peers[i]);
1335 
1336         if (!port) {
1337             goto err_port_alloc;
1338         }
1339 
1340         r->fp_port[i] = port;
1341         fp_port_set_world(port, r->world_dflt);
1342     }
1343 
1344     QLIST_INSERT_HEAD(&rockers, r, next);
1345 
1346     return 0;
1347 
1348 err_port_alloc:
1349     for (--i; i >= 0; i--) {
1350         FpPort *port = r->fp_port[i];
1351         fp_port_free(port);
1352     }
1353     i = rocker_pci_ring_count(r);
1354 err_ring_alloc:
1355     for (--i; i >= 0; i--) {
1356         desc_ring_free(r->rings[i]);
1357     }
1358     g_free(r->rings);
1359 err_rings_alloc:
1360 err_duplicate:
1361     rocker_msix_uninit(r);
1362 err_msix_init:
1363     object_unparent(OBJECT(&r->msix_bar));
1364     object_unparent(OBJECT(&r->mmio));
1365 err_world_alloc:
1366     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1367         if (r->worlds[i]) {
1368             world_free(r->worlds[i]);
1369         }
1370     }
1371     return err;
1372 }
1373 
1374 static void pci_rocker_uninit(PCIDevice *dev)
1375 {
1376     Rocker *r = to_rocker(dev);
1377     int i;
1378 
1379     QLIST_REMOVE(r, next);
1380 
1381     for (i = 0; i < r->fp_ports; i++) {
1382         FpPort *port = r->fp_port[i];
1383 
1384         fp_port_free(port);
1385         r->fp_port[i] = NULL;
1386     }
1387 
1388     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1389         if (r->rings[i]) {
1390             desc_ring_free(r->rings[i]);
1391         }
1392     }
1393     g_free(r->rings);
1394 
1395     rocker_msix_uninit(r);
1396     object_unparent(OBJECT(&r->msix_bar));
1397     object_unparent(OBJECT(&r->mmio));
1398 
1399     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1400         if (r->worlds[i]) {
1401             world_free(r->worlds[i]);
1402         }
1403     }
1404     g_free(r->fp_ports_peers);
1405 }
1406 
1407 static void rocker_reset(DeviceState *dev)
1408 {
1409     Rocker *r = to_rocker(dev);
1410     int i;
1411 
1412     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1413         if (r->worlds[i]) {
1414             world_reset(r->worlds[i]);
1415         }
1416     }
1417     for (i = 0; i < r->fp_ports; i++) {
1418         fp_port_reset(r->fp_port[i]);
1419         fp_port_set_world(r->fp_port[i], r->world_dflt);
1420     }
1421 
1422     r->test_reg = 0;
1423     r->test_reg64 = 0;
1424     r->test_dma_addr = 0;
1425     r->test_dma_size = 0;
1426 
1427     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1428         desc_ring_reset(r->rings[i]);
1429     }
1430 
1431     DPRINTF("Reset done\n");
1432 }
1433 
1434 static Property rocker_properties[] = {
1435     DEFINE_PROP_STRING("name", Rocker, name),
1436     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1437                         fp_start_macaddr),
1438     DEFINE_PROP_UINT64("switch_id", Rocker,
1439                        switch_id, 0),
1440     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1441                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1442     DEFINE_PROP_END_OF_LIST(),
1443 };
1444 
1445 static const VMStateDescription rocker_vmsd = {
1446     .name = ROCKER,
1447     .unmigratable = 1,
1448 };
1449 
1450 static void rocker_class_init(ObjectClass *klass, void *data)
1451 {
1452     DeviceClass *dc = DEVICE_CLASS(klass);
1453     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1454 
1455     k->init = pci_rocker_init;
1456     k->exit = pci_rocker_uninit;
1457     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1458     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1459     k->revision = ROCKER_PCI_REVISION;
1460     k->class_id = PCI_CLASS_NETWORK_OTHER;
1461     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1462     dc->desc = "Rocker Switch";
1463     dc->reset = rocker_reset;
1464     dc->props = rocker_properties;
1465     dc->vmsd = &rocker_vmsd;
1466 }
1467 
1468 static const TypeInfo rocker_info = {
1469     .name          = ROCKER,
1470     .parent        = TYPE_PCI_DEVICE,
1471     .instance_size = sizeof(Rocker),
1472     .class_init    = rocker_class_init,
1473 };
1474 
1475 static void rocker_register_types(void)
1476 {
1477     type_register_static(&rocker_info);
1478 }
1479 
1480 type_init(rocker_register_types)
1481