14bf50f18SLuigi Rizzo /* 24bf50f18SLuigi Rizzo * Copyright (C) 2013 Luigi Rizzo. All rights reserved. 34bf50f18SLuigi Rizzo * 44bf50f18SLuigi Rizzo * Redistribution and use in source and binary forms, with or without 54bf50f18SLuigi Rizzo * modification, are permitted provided that the following conditions 64bf50f18SLuigi Rizzo * are met: 74bf50f18SLuigi Rizzo * 1. Redistributions of source code must retain the above copyright 84bf50f18SLuigi Rizzo * notice, this list of conditions and the following disclaimer. 94bf50f18SLuigi Rizzo * 2. Redistributions in binary form must reproduce the above copyright 104bf50f18SLuigi Rizzo * notice, this list of conditions and the following disclaimer in the 114bf50f18SLuigi Rizzo * documentation and/or other materials provided with the distribution. 124bf50f18SLuigi Rizzo * 134bf50f18SLuigi Rizzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 144bf50f18SLuigi Rizzo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 154bf50f18SLuigi Rizzo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 164bf50f18SLuigi Rizzo * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 174bf50f18SLuigi Rizzo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 184bf50f18SLuigi Rizzo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 194bf50f18SLuigi Rizzo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 204bf50f18SLuigi Rizzo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 214bf50f18SLuigi Rizzo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 224bf50f18SLuigi Rizzo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 234bf50f18SLuigi Rizzo * SUCH DAMAGE. 244bf50f18SLuigi Rizzo */ 254bf50f18SLuigi Rizzo 264bf50f18SLuigi Rizzo #ifndef NET_PARAVIRT_H 274bf50f18SLuigi Rizzo #define NET_PARAVIRT_H 284bf50f18SLuigi Rizzo 294bf50f18SLuigi Rizzo /* 304bf50f18SLuigi Rizzo * 314bf50f18SLuigi Rizzo Support for virtio-like communication between host (H) and guest (G) NICs. 324bf50f18SLuigi Rizzo 334bf50f18SLuigi Rizzo THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE. 344bf50f18SLuigi Rizzo 354bf50f18SLuigi Rizzo The guest allocates the shared Communication Status Block (csb) and 364bf50f18SLuigi Rizzo write its physical address at CSBAL and CSBAH (data is little endian). 374bf50f18SLuigi Rizzo csb->csb_on enables the mode. If disabled, the device acts a regular one. 384bf50f18SLuigi Rizzo 394bf50f18SLuigi Rizzo Notifications for tx and rx are exchanged without vm exits 404bf50f18SLuigi Rizzo if possible. In particular (only mentioning csb mode below), 414bf50f18SLuigi Rizzo the following actions are performed. In the description below, 424bf50f18SLuigi Rizzo "double check" means verifying again the condition that caused 434bf50f18SLuigi Rizzo the previous action, and reverting the action if the condition has 444bf50f18SLuigi Rizzo changed. The condition typically depends on a variable set by the 454bf50f18SLuigi Rizzo other party, and the double check is done to avoid races. E.g. 464bf50f18SLuigi Rizzo 474bf50f18SLuigi Rizzo // start with A=0 484bf50f18SLuigi Rizzo again: 494bf50f18SLuigi Rizzo // do something 504bf50f18SLuigi Rizzo if ( cond(C) ) { // C is written by the other side 514bf50f18SLuigi Rizzo A = 1; 524bf50f18SLuigi Rizzo // barrier 534bf50f18SLuigi Rizzo if ( !cond(C) ) { 544bf50f18SLuigi Rizzo A = 0; 554bf50f18SLuigi Rizzo goto again; 564bf50f18SLuigi Rizzo } 574bf50f18SLuigi Rizzo } 584bf50f18SLuigi Rizzo 594bf50f18SLuigi Rizzo TX: start from idle: 604bf50f18SLuigi Rizzo H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new 614bf50f18SLuigi Rizzo transmissions, G always updates guest_tdt. If host_need_txkick == 1, 624bf50f18SLuigi Rizzo G also writes to the TDT, which acts as a kick to H (so pending 634bf50f18SLuigi Rizzo writes are always dispatched to H as soon as possible.) 644bf50f18SLuigi Rizzo 654bf50f18SLuigi Rizzo TX: active state: 664bf50f18SLuigi Rizzo On the kick (TDT write) H sets host_need_txkick == 0 (if not 674bf50f18SLuigi Rizzo done already by G), and starts an I/O thread trying to consume 684bf50f18SLuigi Rizzo packets from TDH to guest_tdt, periodically refreshing host_tdh 694bf50f18SLuigi Rizzo and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1, 704bf50f18SLuigi Rizzo and then does the "double check" for race avoidance. 714bf50f18SLuigi Rizzo 724bf50f18SLuigi Rizzo TX: G runs out of buffers 734bf50f18SLuigi Rizzo XXX there are two mechanisms, one boolean (using guest_need_txkick) 744bf50f18SLuigi Rizzo and one with a threshold (using guest_txkick_at). They are mutually 754bf50f18SLuigi Rizzo exclusive. 764bf50f18SLuigi Rizzo BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does 774bf50f18SLuigi Rizzo the double check. If H finds guest_need_txkick== 1 on a write 784bf50f18SLuigi Rizzo to TDH, it also generates an interrupt. 794bf50f18SLuigi Rizzo THRESHOLD: G sets guest_txkick_at to the TDH value for which it 804bf50f18SLuigi Rizzo wants to receive an interrupt. When H detects that TDH moves 814bf50f18SLuigi Rizzo across guest_txkick_at, it generates an interrupt. 824bf50f18SLuigi Rizzo This second mechanism reduces the number of interrupts and 834bf50f18SLuigi Rizzo TDT writes on the transmit side when the host is too slow. 844bf50f18SLuigi Rizzo 854bf50f18SLuigi Rizzo RX: start from idle 864bf50f18SLuigi Rizzo G starts with guest_need_rxkick = 1 when the receive ring is empty. 874bf50f18SLuigi Rizzo As packets arrive, H updates host_rdh (and RDH) and also generates an 884bf50f18SLuigi Rizzo interrupt when guest_need_rxkick == 1 (so incoming packets are 894bf50f18SLuigi Rizzo always reported to G as soon as possible, apart from interrupt 904bf50f18SLuigi Rizzo moderation delays). It also tracks guest_rdt for new buffers. 914bf50f18SLuigi Rizzo 924bf50f18SLuigi Rizzo RX: active state 934bf50f18SLuigi Rizzo As the interrupt arrives, G sets guest_need_rxkick = 0 and starts 944bf50f18SLuigi Rizzo draining packets from the receive ring, while updating guest_rdt 954bf50f18SLuigi Rizzo When G runs out of packets it sets guest_need_rxkick=1 and does the 964bf50f18SLuigi Rizzo double check. 974bf50f18SLuigi Rizzo 984bf50f18SLuigi Rizzo RX: H runs out of buffers 994bf50f18SLuigi Rizzo XXX there are two mechanisms, one boolean (using host_need_rxkick) 1004bf50f18SLuigi Rizzo and one with a threshold (using host_xxkick_at). They are mutually 1014bf50f18SLuigi Rizzo exclusive. 1024bf50f18SLuigi Rizzo BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the 1034bf50f18SLuigi Rizzo double check. If G finds host_need_rxkick==1 on updating guest_rdt, 1044bf50f18SLuigi Rizzo it also writes to RDT causing a kick to H. 1054bf50f18SLuigi Rizzo THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants 1064bf50f18SLuigi Rizzo to receive a kick. When G detects that guest_rdt moves across 1074bf50f18SLuigi Rizzo host_rxkick_at, it writes to RDT thus generates a kick. 1084bf50f18SLuigi Rizzo This second mechanism reduces the number of kicks and 1094bf50f18SLuigi Rizzo RDT writes on the receive side when the guest is too slow and 1104bf50f18SLuigi Rizzo would free only a few buffers at a time. 1114bf50f18SLuigi Rizzo 1124bf50f18SLuigi Rizzo */ 1134bf50f18SLuigi Rizzo struct paravirt_csb { 1144bf50f18SLuigi Rizzo /* XXX revise the layout to minimize cache bounces. 1154bf50f18SLuigi Rizzo * Usage is described as follows: 1164bf50f18SLuigi Rizzo * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never 1174bf50f18SLuigi Rizzo */ 1184bf50f18SLuigi Rizzo /* these are (mostly) written by the guest */ 1194bf50f18SLuigi Rizzo uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */ 1204bf50f18SLuigi Rizzo uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */ 1214bf50f18SLuigi Rizzo uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */ 1224bf50f18SLuigi Rizzo uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */ 1234bf50f18SLuigi Rizzo uint32_t guest_rdt; /* GW+ HR+ rx buffers available */ 1244bf50f18SLuigi Rizzo uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */ 1254bf50f18SLuigi Rizzo uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */ 1264bf50f18SLuigi Rizzo uint32_t pad[9]; 1274bf50f18SLuigi Rizzo 1284bf50f18SLuigi Rizzo /* these are (mostly) written by the host */ 1294bf50f18SLuigi Rizzo uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */ 1304bf50f18SLuigi Rizzo uint32_t host_need_txkick; /* GR+ HW- start the iothread */ 1314bf50f18SLuigi Rizzo uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep. 1324bf50f18SLuigi Rizzo * set by the guest */ 1334bf50f18SLuigi Rizzo uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */ 1344bf50f18SLuigi Rizzo uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */ 1354bf50f18SLuigi Rizzo uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */ 1364bf50f18SLuigi Rizzo uint32_t host_isr; /* GR* HW* shadow copy of ISR */ 1374bf50f18SLuigi Rizzo uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */ 1384bf50f18SLuigi Rizzo uint32_t vnet_ring_high; /* Vnet ring physical address high. */ 1394bf50f18SLuigi Rizzo uint32_t vnet_ring_low; /* Vnet ring physical address low. */ 1404bf50f18SLuigi Rizzo }; 1414bf50f18SLuigi Rizzo 1424bf50f18SLuigi Rizzo #define NET_PARAVIRT_CSB_SIZE 4096 1434bf50f18SLuigi Rizzo #define NET_PARAVIRT_NONE (~((uint32_t)0)) 1444bf50f18SLuigi Rizzo 1454bf50f18SLuigi Rizzo #ifdef QEMU_PCI_H 1464bf50f18SLuigi Rizzo 1474bf50f18SLuigi Rizzo /* 1484bf50f18SLuigi Rizzo * API functions only available within QEMU 1494bf50f18SLuigi Rizzo */ 1504bf50f18SLuigi Rizzo 1514bf50f18SLuigi Rizzo void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal, 1524bf50f18SLuigi Rizzo uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as); 1534bf50f18SLuigi Rizzo 1544bf50f18SLuigi Rizzo #endif /* QEMU_PCI_H */ 1554bf50f18SLuigi Rizzo 1564bf50f18SLuigi Rizzo #endif /* NET_PARAVIRT_H */ 157