xref: /qemu/hw/net/rocker/rocker.c (revision 6402cbbb)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/msix.h"
22 #include "net/net.h"
23 #include "net/eth.h"
24 #include "qemu/iov.h"
25 #include "qemu/bitops.h"
26 #include "qmp-commands.h"
27 
28 #include "rocker.h"
29 #include "rocker_hw.h"
30 #include "rocker_fp.h"
31 #include "rocker_desc.h"
32 #include "rocker_tlv.h"
33 #include "rocker_world.h"
34 #include "rocker_of_dpa.h"
35 
36 struct rocker {
37     /* private */
38     PCIDevice parent_obj;
39     /* public */
40 
41     MemoryRegion mmio;
42     MemoryRegion msix_bar;
43 
44     /* switch configuration */
45     char *name;                  /* switch name */
46     char *world_name;            /* world name */
47     uint32_t fp_ports;           /* front-panel port count */
48     NICPeers *fp_ports_peers;
49     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
50     uint64_t switch_id;          /* switch id */
51 
52     /* front-panel ports */
53     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
54 
55     /* register backings */
56     uint32_t test_reg;
57     uint64_t test_reg64;
58     dma_addr_t test_dma_addr;
59     uint32_t test_dma_size;
60     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
61 
62     /* desc rings */
63     DescRing **rings;
64 
65     /* switch worlds */
66     World *worlds[ROCKER_WORLD_TYPE_MAX];
67     World *world_dflt;
68 
69     QLIST_ENTRY(rocker) next;
70 };
71 
72 #define ROCKER "rocker"
73 
74 #define to_rocker(obj) \
75     OBJECT_CHECK(Rocker, (obj), ROCKER)
76 
77 static QLIST_HEAD(, rocker) rockers;
78 
79 Rocker *rocker_find(const char *name)
80 {
81     Rocker *r;
82 
83     QLIST_FOREACH(r, &rockers, next)
84         if (strcmp(r->name, name) == 0) {
85             return r;
86         }
87 
88     return NULL;
89 }
90 
91 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
92 {
93     if (type < ROCKER_WORLD_TYPE_MAX) {
94         return r->worlds[type];
95     }
96     return NULL;
97 }
98 
99 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
100 {
101     RockerSwitch *rocker;
102     Rocker *r;
103 
104     r = rocker_find(name);
105     if (!r) {
106         error_setg(errp, "rocker %s not found", name);
107         return NULL;
108     }
109 
110     rocker = g_new0(RockerSwitch, 1);
111     rocker->name = g_strdup(r->name);
112     rocker->id = r->switch_id;
113     rocker->ports = r->fp_ports;
114 
115     return rocker;
116 }
117 
118 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
119 {
120     RockerPortList *list = NULL;
121     Rocker *r;
122     int i;
123 
124     r = rocker_find(name);
125     if (!r) {
126         error_setg(errp, "rocker %s not found", name);
127         return NULL;
128     }
129 
130     for (i = r->fp_ports - 1; i >= 0; i--) {
131         RockerPortList *info = g_malloc0(sizeof(*info));
132         info->value = g_malloc0(sizeof(*info->value));
133         struct fp_port *port = r->fp_port[i];
134 
135         fp_port_get_info(port, info);
136         info->next = list;
137         list = info;
138     }
139 
140     return list;
141 }
142 
143 uint32_t rocker_fp_ports(Rocker *r)
144 {
145     return r->fp_ports;
146 }
147 
148 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
149                                             DescRing *ring)
150 {
151     return (desc_ring_index(ring) - 2) / 2 + 1;
152 }
153 
154 static int tx_consume(Rocker *r, DescInfo *info)
155 {
156     PCIDevice *dev = PCI_DEVICE(r);
157     char *buf = desc_get_buf(info, true);
158     RockerTlv *tlv_frag;
159     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
160     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
161     uint32_t pport;
162     uint32_t port;
163     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
164     uint16_t tx_l3_csum_off = 0;
165     uint16_t tx_tso_mss = 0;
166     uint16_t tx_tso_hdr_len = 0;
167     int iovcnt = 0;
168     int err = ROCKER_OK;
169     int rem;
170     int i;
171 
172     if (!buf) {
173         return -ROCKER_ENXIO;
174     }
175 
176     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
177 
178     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
179         return -ROCKER_EINVAL;
180     }
181 
182     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
183     if (!fp_port_from_pport(pport, &port)) {
184         return -ROCKER_EINVAL;
185     }
186 
187     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
188         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
189     }
190 
191     switch (tx_offload) {
192     case ROCKER_TX_OFFLOAD_L3_CSUM:
193         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
194             return -ROCKER_EINVAL;
195         }
196         break;
197     case ROCKER_TX_OFFLOAD_TSO:
198         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
199             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
200             return -ROCKER_EINVAL;
201         }
202         break;
203     }
204 
205     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
206         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
207     }
208 
209     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
210         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
211     }
212 
213     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
214         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
215     }
216 
217     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
218         hwaddr frag_addr;
219         uint16_t frag_len;
220 
221         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
222             err = -ROCKER_EINVAL;
223             goto err_bad_attr;
224         }
225 
226         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
227 
228         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
229             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
230             err = -ROCKER_EINVAL;
231             goto err_bad_attr;
232         }
233 
234         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
235         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
236 
237         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
238             goto err_too_many_frags;
239         }
240         iov[iovcnt].iov_len = frag_len;
241         iov[iovcnt].iov_base = g_malloc(frag_len);
242         if (!iov[iovcnt].iov_base) {
243             err = -ROCKER_ENOMEM;
244             goto err_no_mem;
245         }
246 
247         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
248                      iov[iovcnt].iov_len);
249 
250         iovcnt++;
251     }
252 
253     if (iovcnt) {
254         /* XXX perform Tx offloads */
255         /* XXX   silence compiler for now */
256         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
257     }
258 
259     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
260 
261 err_too_many_frags:
262 err_no_mem:
263 err_bad_attr:
264     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
265         g_free(iov[i].iov_base);
266     }
267 
268     return err;
269 }
270 
271 static int cmd_get_port_settings(Rocker *r,
272                                  DescInfo *info, char *buf,
273                                  RockerTlv *cmd_info_tlv)
274 {
275     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
276     RockerTlv *nest;
277     FpPort *fp_port;
278     uint32_t pport;
279     uint32_t port;
280     uint32_t speed;
281     uint8_t duplex;
282     uint8_t autoneg;
283     uint8_t learning;
284     char *phys_name;
285     MACAddr macaddr;
286     enum rocker_world_type mode;
287     size_t tlv_size;
288     int pos;
289     int err;
290 
291     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
292                             cmd_info_tlv);
293 
294     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
295         return -ROCKER_EINVAL;
296     }
297 
298     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
299     if (!fp_port_from_pport(pport, &port)) {
300         return -ROCKER_EINVAL;
301     }
302     fp_port = r->fp_port[port];
303 
304     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
305     if (err) {
306         return err;
307     }
308 
309     fp_port_get_macaddr(fp_port, &macaddr);
310     mode = world_type(fp_port_get_world(fp_port));
311     learning = fp_port_get_learning(fp_port);
312     phys_name = fp_port_get_name(fp_port);
313 
314     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
315                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
316                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
317                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
318                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
319                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
320                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
321                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
322                rocker_tlv_total_size(strlen(phys_name));
323 
324     if (tlv_size > desc_buf_size(info)) {
325         return -ROCKER_EMSGSIZE;
326     }
327 
328     pos = 0;
329     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
330     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
331     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
332     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
333     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
334     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
335                    sizeof(macaddr.a), macaddr.a);
336     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
337     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
338                       learning);
339     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
340                    strlen(phys_name), phys_name);
341     rocker_tlv_nest_end(buf, &pos, nest);
342 
343     return desc_set_buf(info, tlv_size);
344 }
345 
346 static int cmd_set_port_settings(Rocker *r,
347                                  RockerTlv *cmd_info_tlv)
348 {
349     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
350     FpPort *fp_port;
351     uint32_t pport;
352     uint32_t port;
353     uint32_t speed;
354     uint8_t duplex;
355     uint8_t autoneg;
356     uint8_t learning;
357     MACAddr macaddr;
358     enum rocker_world_type mode;
359     int err;
360 
361     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
362                             cmd_info_tlv);
363 
364     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
365         return -ROCKER_EINVAL;
366     }
367 
368     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
369     if (!fp_port_from_pport(pport, &port)) {
370         return -ROCKER_EINVAL;
371     }
372     fp_port = r->fp_port[port];
373 
374     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
375         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
376         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
377 
378         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
379         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
380         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
381 
382         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
383         if (err) {
384             return err;
385         }
386     }
387 
388     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
389         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
390             sizeof(macaddr.a)) {
391             return -ROCKER_EINVAL;
392         }
393         memcpy(macaddr.a,
394                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
395                sizeof(macaddr.a));
396         fp_port_set_macaddr(fp_port, &macaddr);
397     }
398 
399     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
400         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
401         if (mode >= ROCKER_WORLD_TYPE_MAX) {
402             return -ROCKER_EINVAL;
403         }
404         /* We don't support world change. */
405         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
406             return -ROCKER_EINVAL;
407         }
408     }
409 
410     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
411         learning =
412             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
413         fp_port_set_learning(fp_port, learning);
414     }
415 
416     return ROCKER_OK;
417 }
418 
419 static int cmd_consume(Rocker *r, DescInfo *info)
420 {
421     char *buf = desc_get_buf(info, false);
422     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
423     RockerTlv *info_tlv;
424     World *world;
425     uint16_t cmd;
426     int err;
427 
428     if (!buf) {
429         return -ROCKER_ENXIO;
430     }
431 
432     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
433 
434     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
435         return -ROCKER_EINVAL;
436     }
437 
438     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
439     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
440 
441     /* This might be reworked to something like this:
442      * Every world will have an array of command handlers from
443      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
444      * up to each world to implement whatever command it want.
445      * It can reference "generic" commands as cmd_set_port_settings or
446      * cmd_get_port_settings
447      */
448 
449     switch (cmd) {
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
454     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
455     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
456     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
457     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
458         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
459         err = world_do_cmd(world, info, buf, cmd, info_tlv);
460         break;
461     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
462         err = cmd_get_port_settings(r, info, buf, info_tlv);
463         break;
464     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
465         err = cmd_set_port_settings(r, info_tlv);
466         break;
467     default:
468         err = -ROCKER_EINVAL;
469         break;
470     }
471 
472     return err;
473 }
474 
475 static void rocker_msix_irq(Rocker *r, unsigned vector)
476 {
477     PCIDevice *dev = PCI_DEVICE(r);
478 
479     DPRINTF("MSI-X notify request for vector %d\n", vector);
480     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
481         DPRINTF("incorrect vector %d\n", vector);
482         return;
483     }
484     msix_notify(dev, vector);
485 }
486 
487 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
488 {
489     DescRing *ring = r->rings[ROCKER_RING_EVENT];
490     DescInfo *info = desc_ring_fetch_desc(ring);
491     RockerTlv *nest;
492     char *buf;
493     size_t tlv_size;
494     int pos;
495     int err;
496 
497     if (!info) {
498         return -ROCKER_ENOBUFS;
499     }
500 
501     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
502                rocker_tlv_total_size(0) +                 /* nest */
503                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
504                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
505 
506     if (tlv_size > desc_buf_size(info)) {
507         err = -ROCKER_EMSGSIZE;
508         goto err_too_big;
509     }
510 
511     buf = desc_get_buf(info, false);
512     if (!buf) {
513         err = -ROCKER_ENOMEM;
514         goto err_no_mem;
515     }
516 
517     pos = 0;
518     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
519                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
520     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
521     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
522     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
523                       link_up ? 1 : 0);
524     rocker_tlv_nest_end(buf, &pos, nest);
525 
526     err = desc_set_buf(info, tlv_size);
527 
528 err_too_big:
529 err_no_mem:
530     if (desc_ring_post_desc(ring, err)) {
531         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
532     }
533 
534     return err;
535 }
536 
537 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
538                                uint16_t vlan_id)
539 {
540     DescRing *ring = r->rings[ROCKER_RING_EVENT];
541     DescInfo *info;
542     FpPort *fp_port;
543     uint32_t port;
544     RockerTlv *nest;
545     char *buf;
546     size_t tlv_size;
547     int pos;
548     int err;
549 
550     if (!fp_port_from_pport(pport, &port)) {
551         return -ROCKER_EINVAL;
552     }
553     fp_port = r->fp_port[port];
554     if (!fp_port_get_learning(fp_port)) {
555         return ROCKER_OK;
556     }
557 
558     info = desc_ring_fetch_desc(ring);
559     if (!info) {
560         return -ROCKER_ENOBUFS;
561     }
562 
563     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
564                rocker_tlv_total_size(0) +                 /* nest */
565                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
566                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
567                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
568 
569     if (tlv_size > desc_buf_size(info)) {
570         err = -ROCKER_EMSGSIZE;
571         goto err_too_big;
572     }
573 
574     buf = desc_get_buf(info, false);
575     if (!buf) {
576         err = -ROCKER_ENOMEM;
577         goto err_no_mem;
578     }
579 
580     pos = 0;
581     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
582                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
583     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
584     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
585     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
586     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
587     rocker_tlv_nest_end(buf, &pos, nest);
588 
589     err = desc_set_buf(info, tlv_size);
590 
591 err_too_big:
592 err_no_mem:
593     if (desc_ring_post_desc(ring, err)) {
594         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
595     }
596 
597     return err;
598 }
599 
600 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
601                                                      uint32_t pport)
602 {
603     return r->rings[(pport - 1) * 2 + 3];
604 }
605 
606 int rx_produce(World *world, uint32_t pport,
607                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
608 {
609     Rocker *r = world_rocker(world);
610     PCIDevice *dev = (PCIDevice *)r;
611     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
612     DescInfo *info = desc_ring_fetch_desc(ring);
613     char *data;
614     size_t data_size = iov_size(iov, iovcnt);
615     char *buf;
616     uint16_t rx_flags = 0;
617     uint16_t rx_csum = 0;
618     size_t tlv_size;
619     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
620     hwaddr frag_addr;
621     uint16_t frag_max_len;
622     int pos;
623     int err;
624 
625     if (!info) {
626         return -ROCKER_ENOBUFS;
627     }
628 
629     buf = desc_get_buf(info, false);
630     if (!buf) {
631         err = -ROCKER_ENXIO;
632         goto out;
633     }
634     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
635 
636     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
637         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
638         err = -ROCKER_EINVAL;
639         goto out;
640     }
641 
642     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
643     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
644 
645     if (data_size > frag_max_len) {
646         err = -ROCKER_EMSGSIZE;
647         goto out;
648     }
649 
650     if (copy_to_cpu) {
651         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
652     }
653 
654     /* XXX calc rx flags/csum */
655 
656     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
657                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
658                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
659                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
660                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
661 
662     if (tlv_size > desc_buf_size(info)) {
663         err = -ROCKER_EMSGSIZE;
664         goto out;
665     }
666 
667     /* TODO:
668      * iov dma write can be optimized in similar way e1000 does it in
669      * e1000_receive_iov. But maybe if would make sense to introduce
670      * generic helper iov_dma_write.
671      */
672 
673     data = g_malloc(data_size);
674     if (!data) {
675         err = -ROCKER_ENOMEM;
676         goto out;
677     }
678     iov_to_buf(iov, iovcnt, 0, data, data_size);
679     pci_dma_write(dev, frag_addr, data, data_size);
680     g_free(data);
681 
682     pos = 0;
683     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
684     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
685     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
686     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
687     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
688 
689     err = desc_set_buf(info, tlv_size);
690 
691 out:
692     if (desc_ring_post_desc(ring, err)) {
693         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
694     }
695 
696     return err;
697 }
698 
699 int rocker_port_eg(Rocker *r, uint32_t pport,
700                    const struct iovec *iov, int iovcnt)
701 {
702     FpPort *fp_port;
703     uint32_t port;
704 
705     if (!fp_port_from_pport(pport, &port)) {
706         return -ROCKER_EINVAL;
707     }
708 
709     fp_port = r->fp_port[port];
710 
711     return fp_port_eg(fp_port, iov, iovcnt);
712 }
713 
714 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
715 {
716     PCIDevice *dev = PCI_DEVICE(r);
717     char *buf;
718     int i;
719 
720     buf = g_malloc(r->test_dma_size);
721 
722     if (!buf) {
723         DPRINTF("test dma buffer alloc failed");
724         return;
725     }
726 
727     switch (val) {
728     case ROCKER_TEST_DMA_CTRL_CLEAR:
729         memset(buf, 0, r->test_dma_size);
730         break;
731     case ROCKER_TEST_DMA_CTRL_FILL:
732         memset(buf, 0x96, r->test_dma_size);
733         break;
734     case ROCKER_TEST_DMA_CTRL_INVERT:
735         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
736         for (i = 0; i < r->test_dma_size; i++) {
737             buf[i] = ~buf[i];
738         }
739         break;
740     default:
741         DPRINTF("not test dma control val=0x%08x\n", val);
742         goto err_out;
743     }
744     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
745 
746     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
747 
748 err_out:
749     g_free(buf);
750 }
751 
752 static void rocker_reset(DeviceState *dev);
753 
754 static void rocker_control(Rocker *r, uint32_t val)
755 {
756     if (val & ROCKER_CONTROL_RESET) {
757         rocker_reset(DEVICE(r));
758     }
759 }
760 
761 static int rocker_pci_ring_count(Rocker *r)
762 {
763     /* There are:
764      * - command ring
765      * - event ring
766      * - tx and rx ring per each port
767      */
768     return 2 + (2 * r->fp_ports);
769 }
770 
771 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
772 {
773     hwaddr start = ROCKER_DMA_DESC_BASE;
774     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
775 
776     return addr >= start && addr < end;
777 }
778 
779 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
780 {
781     int i;
782     bool old_enabled;
783     bool new_enabled;
784     FpPort *fp_port;
785 
786     for (i = 0; i < r->fp_ports; i++) {
787         fp_port = r->fp_port[i];
788         old_enabled = fp_port_enabled(fp_port);
789         new_enabled = (new >> (i + 1)) & 0x1;
790         if (new_enabled == old_enabled) {
791             continue;
792         }
793         if (new_enabled) {
794             fp_port_enable(r->fp_port[i]);
795         } else {
796             fp_port_disable(r->fp_port[i]);
797         }
798     }
799 }
800 
801 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
802 {
803     Rocker *r = opaque;
804 
805     if (rocker_addr_is_desc_reg(r, addr)) {
806         unsigned index = ROCKER_RING_INDEX(addr);
807         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
808 
809         switch (offset) {
810         case ROCKER_DMA_DESC_ADDR_OFFSET:
811             r->lower32 = (uint64_t)val;
812             break;
813         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
814             desc_ring_set_base_addr(r->rings[index],
815                                     ((uint64_t)val) << 32 | r->lower32);
816             r->lower32 = 0;
817             break;
818         case ROCKER_DMA_DESC_SIZE_OFFSET:
819             desc_ring_set_size(r->rings[index], val);
820             break;
821         case ROCKER_DMA_DESC_HEAD_OFFSET:
822             if (desc_ring_set_head(r->rings[index], val)) {
823                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
824             }
825             break;
826         case ROCKER_DMA_DESC_CTRL_OFFSET:
827             desc_ring_set_ctrl(r->rings[index], val);
828             break;
829         case ROCKER_DMA_DESC_CREDITS_OFFSET:
830             if (desc_ring_ret_credits(r->rings[index], val)) {
831                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
832             }
833             break;
834         default:
835             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
836                     " val=0x%08x (ring %d, addr=0x%02x)\n",
837                     addr, val, index, offset);
838             break;
839         }
840         return;
841     }
842 
843     switch (addr) {
844     case ROCKER_TEST_REG:
845         r->test_reg = val;
846         break;
847     case ROCKER_TEST_REG64:
848     case ROCKER_TEST_DMA_ADDR:
849     case ROCKER_PORT_PHYS_ENABLE:
850         r->lower32 = (uint64_t)val;
851         break;
852     case ROCKER_TEST_REG64 + 4:
853         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
854         r->lower32 = 0;
855         break;
856     case ROCKER_TEST_IRQ:
857         rocker_msix_irq(r, val);
858         break;
859     case ROCKER_TEST_DMA_SIZE:
860         r->test_dma_size = val & 0xFFFF;
861         break;
862     case ROCKER_TEST_DMA_ADDR + 4:
863         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
864         r->lower32 = 0;
865         break;
866     case ROCKER_TEST_DMA_CTRL:
867         rocker_test_dma_ctrl(r, val);
868         break;
869     case ROCKER_CONTROL:
870         rocker_control(r, val);
871         break;
872     case ROCKER_PORT_PHYS_ENABLE + 4:
873         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
874         r->lower32 = 0;
875         break;
876     default:
877         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
878                 " val=0x%08x\n", addr, val);
879         break;
880     }
881 }
882 
883 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
884 {
885     Rocker *r = opaque;
886 
887     if (rocker_addr_is_desc_reg(r, addr)) {
888         unsigned index = ROCKER_RING_INDEX(addr);
889         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
890 
891         switch (offset) {
892         case ROCKER_DMA_DESC_ADDR_OFFSET:
893             desc_ring_set_base_addr(r->rings[index], val);
894             break;
895         default:
896             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
897                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
898                     addr, val, index, offset);
899             break;
900         }
901         return;
902     }
903 
904     switch (addr) {
905     case ROCKER_TEST_REG64:
906         r->test_reg64 = val;
907         break;
908     case ROCKER_TEST_DMA_ADDR:
909         r->test_dma_addr = val;
910         break;
911     case ROCKER_PORT_PHYS_ENABLE:
912         rocker_port_phys_enable_write(r, val);
913         break;
914     default:
915         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
916                 " val=0x" TARGET_FMT_plx "\n", addr, val);
917         break;
918     }
919 }
920 
921 #ifdef DEBUG_ROCKER
922 #define regname(reg) case (reg): return #reg
923 static const char *rocker_reg_name(void *opaque, hwaddr addr)
924 {
925     Rocker *r = opaque;
926 
927     if (rocker_addr_is_desc_reg(r, addr)) {
928         unsigned index = ROCKER_RING_INDEX(addr);
929         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
930         static char buf[100];
931         char ring_name[10];
932 
933         switch (index) {
934         case 0:
935             sprintf(ring_name, "cmd");
936             break;
937         case 1:
938             sprintf(ring_name, "event");
939             break;
940         default:
941             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
942                     (index - 2) / 2);
943         }
944 
945         switch (offset) {
946         case ROCKER_DMA_DESC_ADDR_OFFSET:
947             sprintf(buf, "Ring[%s] ADDR", ring_name);
948             return buf;
949         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
950             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
951             return buf;
952         case ROCKER_DMA_DESC_SIZE_OFFSET:
953             sprintf(buf, "Ring[%s] SIZE", ring_name);
954             return buf;
955         case ROCKER_DMA_DESC_HEAD_OFFSET:
956             sprintf(buf, "Ring[%s] HEAD", ring_name);
957             return buf;
958         case ROCKER_DMA_DESC_TAIL_OFFSET:
959             sprintf(buf, "Ring[%s] TAIL", ring_name);
960             return buf;
961         case ROCKER_DMA_DESC_CTRL_OFFSET:
962             sprintf(buf, "Ring[%s] CTRL", ring_name);
963             return buf;
964         case ROCKER_DMA_DESC_CREDITS_OFFSET:
965             sprintf(buf, "Ring[%s] CREDITS", ring_name);
966             return buf;
967         default:
968             sprintf(buf, "Ring[%s] ???", ring_name);
969             return buf;
970         }
971     } else {
972         switch (addr) {
973             regname(ROCKER_BOGUS_REG0);
974             regname(ROCKER_BOGUS_REG1);
975             regname(ROCKER_BOGUS_REG2);
976             regname(ROCKER_BOGUS_REG3);
977             regname(ROCKER_TEST_REG);
978             regname(ROCKER_TEST_REG64);
979             regname(ROCKER_TEST_REG64+4);
980             regname(ROCKER_TEST_IRQ);
981             regname(ROCKER_TEST_DMA_ADDR);
982             regname(ROCKER_TEST_DMA_ADDR+4);
983             regname(ROCKER_TEST_DMA_SIZE);
984             regname(ROCKER_TEST_DMA_CTRL);
985             regname(ROCKER_CONTROL);
986             regname(ROCKER_PORT_PHYS_COUNT);
987             regname(ROCKER_PORT_PHYS_LINK_STATUS);
988             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
989             regname(ROCKER_PORT_PHYS_ENABLE);
990             regname(ROCKER_PORT_PHYS_ENABLE+4);
991             regname(ROCKER_SWITCH_ID);
992             regname(ROCKER_SWITCH_ID+4);
993         }
994     }
995     return "???";
996 }
997 #else
998 static const char *rocker_reg_name(void *opaque, hwaddr addr)
999 {
1000     return NULL;
1001 }
1002 #endif
1003 
1004 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1005                               unsigned size)
1006 {
1007     DPRINTF("Write %s addr " TARGET_FMT_plx
1008             ", size %u, val " TARGET_FMT_plx "\n",
1009             rocker_reg_name(opaque, addr), addr, size, val);
1010 
1011     switch (size) {
1012     case 4:
1013         rocker_io_writel(opaque, addr, val);
1014         break;
1015     case 8:
1016         rocker_io_writeq(opaque, addr, val);
1017         break;
1018     }
1019 }
1020 
1021 static uint64_t rocker_port_phys_link_status(Rocker *r)
1022 {
1023     int i;
1024     uint64_t status = 0;
1025 
1026     for (i = 0; i < r->fp_ports; i++) {
1027         FpPort *port = r->fp_port[i];
1028 
1029         if (fp_port_get_link_up(port)) {
1030             status |= 1 << (i + 1);
1031         }
1032     }
1033     return status;
1034 }
1035 
1036 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1037 {
1038     int i;
1039     uint64_t ret = 0;
1040 
1041     for (i = 0; i < r->fp_ports; i++) {
1042         FpPort *port = r->fp_port[i];
1043 
1044         if (fp_port_enabled(port)) {
1045             ret |= 1 << (i + 1);
1046         }
1047     }
1048     return ret;
1049 }
1050 
1051 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1052 {
1053     Rocker *r = opaque;
1054     uint32_t ret;
1055 
1056     if (rocker_addr_is_desc_reg(r, addr)) {
1057         unsigned index = ROCKER_RING_INDEX(addr);
1058         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1059 
1060         switch (offset) {
1061         case ROCKER_DMA_DESC_ADDR_OFFSET:
1062             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1063             break;
1064         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1065             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1066             break;
1067         case ROCKER_DMA_DESC_SIZE_OFFSET:
1068             ret = desc_ring_get_size(r->rings[index]);
1069             break;
1070         case ROCKER_DMA_DESC_HEAD_OFFSET:
1071             ret = desc_ring_get_head(r->rings[index]);
1072             break;
1073         case ROCKER_DMA_DESC_TAIL_OFFSET:
1074             ret = desc_ring_get_tail(r->rings[index]);
1075             break;
1076         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1077             ret = desc_ring_get_credits(r->rings[index]);
1078             break;
1079         default:
1080             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1081                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1082             ret = 0;
1083             break;
1084         }
1085         return ret;
1086     }
1087 
1088     switch (addr) {
1089     case ROCKER_BOGUS_REG0:
1090     case ROCKER_BOGUS_REG1:
1091     case ROCKER_BOGUS_REG2:
1092     case ROCKER_BOGUS_REG3:
1093         ret = 0xDEADBABE;
1094         break;
1095     case ROCKER_TEST_REG:
1096         ret = r->test_reg * 2;
1097         break;
1098     case ROCKER_TEST_REG64:
1099         ret = (uint32_t)(r->test_reg64 * 2);
1100         break;
1101     case ROCKER_TEST_REG64 + 4:
1102         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1103         break;
1104     case ROCKER_TEST_DMA_SIZE:
1105         ret = r->test_dma_size;
1106         break;
1107     case ROCKER_TEST_DMA_ADDR:
1108         ret = (uint32_t)r->test_dma_addr;
1109         break;
1110     case ROCKER_TEST_DMA_ADDR + 4:
1111         ret = (uint32_t)(r->test_dma_addr >> 32);
1112         break;
1113     case ROCKER_PORT_PHYS_COUNT:
1114         ret = r->fp_ports;
1115         break;
1116     case ROCKER_PORT_PHYS_LINK_STATUS:
1117         ret = (uint32_t)rocker_port_phys_link_status(r);
1118         break;
1119     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1120         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1121         break;
1122     case ROCKER_PORT_PHYS_ENABLE:
1123         ret = (uint32_t)rocker_port_phys_enable_read(r);
1124         break;
1125     case ROCKER_PORT_PHYS_ENABLE + 4:
1126         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1127         break;
1128     case ROCKER_SWITCH_ID:
1129         ret = (uint32_t)r->switch_id;
1130         break;
1131     case ROCKER_SWITCH_ID + 4:
1132         ret = (uint32_t)(r->switch_id >> 32);
1133         break;
1134     default:
1135         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1136         ret = 0;
1137         break;
1138     }
1139     return ret;
1140 }
1141 
1142 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1143 {
1144     Rocker *r = opaque;
1145     uint64_t ret;
1146 
1147     if (rocker_addr_is_desc_reg(r, addr)) {
1148         unsigned index = ROCKER_RING_INDEX(addr);
1149         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1150 
1151         switch (addr & ROCKER_DMA_DESC_MASK) {
1152         case ROCKER_DMA_DESC_ADDR_OFFSET:
1153             ret = desc_ring_get_base_addr(r->rings[index]);
1154             break;
1155         default:
1156             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1157                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1158             ret = 0;
1159             break;
1160         }
1161         return ret;
1162     }
1163 
1164     switch (addr) {
1165     case ROCKER_BOGUS_REG0:
1166     case ROCKER_BOGUS_REG2:
1167         ret = 0xDEADBABEDEADBABEULL;
1168         break;
1169     case ROCKER_TEST_REG64:
1170         ret = r->test_reg64 * 2;
1171         break;
1172     case ROCKER_TEST_DMA_ADDR:
1173         ret = r->test_dma_addr;
1174         break;
1175     case ROCKER_PORT_PHYS_LINK_STATUS:
1176         ret = rocker_port_phys_link_status(r);
1177         break;
1178     case ROCKER_PORT_PHYS_ENABLE:
1179         ret = rocker_port_phys_enable_read(r);
1180         break;
1181     case ROCKER_SWITCH_ID:
1182         ret = r->switch_id;
1183         break;
1184     default:
1185         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1186         ret = 0;
1187         break;
1188     }
1189     return ret;
1190 }
1191 
1192 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1193 {
1194     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1195             rocker_reg_name(opaque, addr), addr, size);
1196 
1197     switch (size) {
1198     case 4:
1199         return rocker_io_readl(opaque, addr);
1200     case 8:
1201         return rocker_io_readq(opaque, addr);
1202     }
1203 
1204     return -1;
1205 }
1206 
1207 static const MemoryRegionOps rocker_mmio_ops = {
1208     .read = rocker_mmio_read,
1209     .write = rocker_mmio_write,
1210     .endianness = DEVICE_LITTLE_ENDIAN,
1211     .valid = {
1212         .min_access_size = 4,
1213         .max_access_size = 8,
1214     },
1215     .impl = {
1216         .min_access_size = 4,
1217         .max_access_size = 8,
1218     },
1219 };
1220 
1221 static void rocker_msix_vectors_unuse(Rocker *r,
1222                                       unsigned int num_vectors)
1223 {
1224     PCIDevice *dev = PCI_DEVICE(r);
1225     int i;
1226 
1227     for (i = 0; i < num_vectors; i++) {
1228         msix_vector_unuse(dev, i);
1229     }
1230 }
1231 
1232 static int rocker_msix_vectors_use(Rocker *r,
1233                                    unsigned int num_vectors)
1234 {
1235     PCIDevice *dev = PCI_DEVICE(r);
1236     int err;
1237     int i;
1238 
1239     for (i = 0; i < num_vectors; i++) {
1240         err = msix_vector_use(dev, i);
1241         if (err) {
1242             goto rollback;
1243         }
1244     }
1245     return 0;
1246 
1247 rollback:
1248     rocker_msix_vectors_unuse(r, i);
1249     return err;
1250 }
1251 
1252 static int rocker_msix_init(Rocker *r)
1253 {
1254     PCIDevice *dev = PCI_DEVICE(r);
1255     int err;
1256     Error *local_err = NULL;
1257 
1258     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1259                     &r->msix_bar,
1260                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1261                     &r->msix_bar,
1262                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1263                     0, &local_err);
1264     if (err) {
1265         error_report_err(local_err);
1266         return err;
1267     }
1268 
1269     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1270     if (err) {
1271         goto err_msix_vectors_use;
1272     }
1273 
1274     return 0;
1275 
1276 err_msix_vectors_use:
1277     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1278     return err;
1279 }
1280 
1281 static void rocker_msix_uninit(Rocker *r)
1282 {
1283     PCIDevice *dev = PCI_DEVICE(r);
1284 
1285     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1286     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1287 }
1288 
1289 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1290 {
1291     int i;
1292 
1293     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1294         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1295             return r->worlds[i];
1296 	}
1297     }
1298     return NULL;
1299 }
1300 
1301 static int pci_rocker_init(PCIDevice *dev)
1302 {
1303     Rocker *r = to_rocker(dev);
1304     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1305     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1306     static int sw_index;
1307     int i, err = 0;
1308 
1309     /* allocate worlds */
1310 
1311     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1312 
1313     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1314         if (!r->worlds[i]) {
1315             err = -ENOMEM;
1316             goto err_world_alloc;
1317         }
1318     }
1319 
1320     if (!r->world_name) {
1321         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1322     }
1323 
1324     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1325     if (!r->world_dflt) {
1326         fprintf(stderr,
1327                 "rocker: requested world \"%s\" does not exist\n",
1328                 r->world_name);
1329         err = -EINVAL;
1330         goto err_world_type_by_name;
1331     }
1332 
1333     /* set up memory-mapped region at BAR0 */
1334 
1335     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1336                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1337     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1338                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1339 
1340     /* set up memory-mapped region for MSI-X */
1341 
1342     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1343                        ROCKER_PCI_MSIX_BAR_SIZE);
1344     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1345                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1346 
1347     /* MSI-X init */
1348 
1349     err = rocker_msix_init(r);
1350     if (err) {
1351         goto err_msix_init;
1352     }
1353 
1354     /* validate switch properties */
1355 
1356     if (!r->name) {
1357         r->name = g_strdup(ROCKER);
1358     }
1359 
1360     if (rocker_find(r->name)) {
1361         err = -EEXIST;
1362         goto err_duplicate;
1363     }
1364 
1365     /* Rocker name is passed in port name requests to OS with the intention
1366      * that the name is used in interface names. Limit the length of the
1367      * rocker name to avoid naming problems in the OS. Also, adding the
1368      * port number as p# and unganged breakout b#, where # is at most 2
1369      * digits, so leave room for it too (-1 for string terminator, -3 for
1370      * p# and -3 for b#)
1371      */
1372 #define ROCKER_IFNAMSIZ 16
1373 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1374     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1375         fprintf(stderr,
1376                 "rocker: name too long; please shorten to at most %d chars\n",
1377                 MAX_ROCKER_NAME_LEN);
1378         return -EINVAL;
1379     }
1380 
1381     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1382         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1383         r->fp_start_macaddr.a[4] += (sw_index++);
1384     }
1385 
1386     if (!r->switch_id) {
1387         memcpy(&r->switch_id, &r->fp_start_macaddr,
1388                sizeof(r->fp_start_macaddr));
1389     }
1390 
1391     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1392         r->fp_ports = ROCKER_FP_PORTS_MAX;
1393     }
1394 
1395     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1396     if (!r->rings) {
1397         goto err_rings_alloc;
1398     }
1399 
1400     /* Rings are ordered like this:
1401      * - command ring
1402      * - event ring
1403      * - port0 tx ring
1404      * - port0 rx ring
1405      * - port1 tx ring
1406      * - port1 rx ring
1407      * .....
1408      */
1409 
1410     err = -ENOMEM;
1411     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1412         DescRing *ring = desc_ring_alloc(r, i);
1413 
1414         if (!ring) {
1415             goto err_ring_alloc;
1416         }
1417 
1418         if (i == ROCKER_RING_CMD) {
1419             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1420         } else if (i == ROCKER_RING_EVENT) {
1421             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1422         } else if (i % 2 == 0) {
1423             desc_ring_set_consume(ring, tx_consume,
1424                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1425         } else if (i % 2 == 1) {
1426             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1427         }
1428 
1429         r->rings[i] = ring;
1430     }
1431 
1432     for (i = 0; i < r->fp_ports; i++) {
1433         FpPort *port =
1434             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1435                           i, &r->fp_ports_peers[i]);
1436 
1437         if (!port) {
1438             goto err_port_alloc;
1439         }
1440 
1441         r->fp_port[i] = port;
1442         fp_port_set_world(port, r->world_dflt);
1443     }
1444 
1445     QLIST_INSERT_HEAD(&rockers, r, next);
1446 
1447     return 0;
1448 
1449 err_port_alloc:
1450     for (--i; i >= 0; i--) {
1451         FpPort *port = r->fp_port[i];
1452         fp_port_free(port);
1453     }
1454     i = rocker_pci_ring_count(r);
1455 err_ring_alloc:
1456     for (--i; i >= 0; i--) {
1457         desc_ring_free(r->rings[i]);
1458     }
1459     g_free(r->rings);
1460 err_rings_alloc:
1461 err_duplicate:
1462     rocker_msix_uninit(r);
1463 err_msix_init:
1464     object_unparent(OBJECT(&r->msix_bar));
1465     object_unparent(OBJECT(&r->mmio));
1466 err_world_type_by_name:
1467 err_world_alloc:
1468     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1469         if (r->worlds[i]) {
1470             world_free(r->worlds[i]);
1471         }
1472     }
1473     return err;
1474 }
1475 
1476 static void pci_rocker_uninit(PCIDevice *dev)
1477 {
1478     Rocker *r = to_rocker(dev);
1479     int i;
1480 
1481     QLIST_REMOVE(r, next);
1482 
1483     for (i = 0; i < r->fp_ports; i++) {
1484         FpPort *port = r->fp_port[i];
1485 
1486         fp_port_free(port);
1487         r->fp_port[i] = NULL;
1488     }
1489 
1490     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1491         if (r->rings[i]) {
1492             desc_ring_free(r->rings[i]);
1493         }
1494     }
1495     g_free(r->rings);
1496 
1497     rocker_msix_uninit(r);
1498     object_unparent(OBJECT(&r->msix_bar));
1499     object_unparent(OBJECT(&r->mmio));
1500 
1501     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1502         if (r->worlds[i]) {
1503             world_free(r->worlds[i]);
1504         }
1505     }
1506     g_free(r->fp_ports_peers);
1507 }
1508 
1509 static void rocker_reset(DeviceState *dev)
1510 {
1511     Rocker *r = to_rocker(dev);
1512     int i;
1513 
1514     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1515         if (r->worlds[i]) {
1516             world_reset(r->worlds[i]);
1517         }
1518     }
1519     for (i = 0; i < r->fp_ports; i++) {
1520         fp_port_reset(r->fp_port[i]);
1521         fp_port_set_world(r->fp_port[i], r->world_dflt);
1522     }
1523 
1524     r->test_reg = 0;
1525     r->test_reg64 = 0;
1526     r->test_dma_addr = 0;
1527     r->test_dma_size = 0;
1528 
1529     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1530         desc_ring_reset(r->rings[i]);
1531     }
1532 
1533     DPRINTF("Reset done\n");
1534 }
1535 
1536 static Property rocker_properties[] = {
1537     DEFINE_PROP_STRING("name", Rocker, name),
1538     DEFINE_PROP_STRING("world", Rocker, world_name),
1539     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1540                         fp_start_macaddr),
1541     DEFINE_PROP_UINT64("switch_id", Rocker,
1542                        switch_id, 0),
1543     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1544                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1545     DEFINE_PROP_END_OF_LIST(),
1546 };
1547 
1548 static const VMStateDescription rocker_vmsd = {
1549     .name = ROCKER,
1550     .unmigratable = 1,
1551 };
1552 
1553 static void rocker_class_init(ObjectClass *klass, void *data)
1554 {
1555     DeviceClass *dc = DEVICE_CLASS(klass);
1556     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1557 
1558     k->init = pci_rocker_init;
1559     k->exit = pci_rocker_uninit;
1560     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1561     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1562     k->revision = ROCKER_PCI_REVISION;
1563     k->class_id = PCI_CLASS_NETWORK_OTHER;
1564     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1565     dc->desc = "Rocker Switch";
1566     dc->reset = rocker_reset;
1567     dc->props = rocker_properties;
1568     dc->vmsd = &rocker_vmsd;
1569 }
1570 
1571 static const TypeInfo rocker_info = {
1572     .name          = ROCKER,
1573     .parent        = TYPE_PCI_DEVICE,
1574     .instance_size = sizeof(Rocker),
1575     .class_init    = rocker_class_init,
1576 };
1577 
1578 static void rocker_register_types(void)
1579 {
1580     type_register_static(&rocker_info);
1581 }
1582 
1583 type_init(rocker_register_types)
1584