xref: /freebsd/sys/net/paravirt.h (revision 2ff63af9)
14bf50f18SLuigi Rizzo /*
24bf50f18SLuigi Rizzo  * Copyright (C) 2013 Luigi Rizzo. All rights reserved.
34bf50f18SLuigi Rizzo  *
44bf50f18SLuigi Rizzo  * Redistribution and use in source and binary forms, with or without
54bf50f18SLuigi Rizzo  * modification, are permitted provided that the following conditions
64bf50f18SLuigi Rizzo  * are met:
74bf50f18SLuigi Rizzo  *   1. Redistributions of source code must retain the above copyright
84bf50f18SLuigi Rizzo  *      notice, this list of conditions and the following disclaimer.
94bf50f18SLuigi Rizzo  *   2. Redistributions in binary form must reproduce the above copyright
104bf50f18SLuigi Rizzo  *      notice, this list of conditions and the following disclaimer in the
114bf50f18SLuigi Rizzo  *    documentation and/or other materials provided with the distribution.
124bf50f18SLuigi Rizzo  *
134bf50f18SLuigi Rizzo  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
144bf50f18SLuigi Rizzo  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
154bf50f18SLuigi Rizzo  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
164bf50f18SLuigi Rizzo  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
174bf50f18SLuigi Rizzo  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
184bf50f18SLuigi Rizzo  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
194bf50f18SLuigi Rizzo  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
204bf50f18SLuigi Rizzo  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
214bf50f18SLuigi Rizzo  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
224bf50f18SLuigi Rizzo  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
234bf50f18SLuigi Rizzo  * SUCH DAMAGE.
244bf50f18SLuigi Rizzo  */
254bf50f18SLuigi Rizzo 
264bf50f18SLuigi Rizzo #ifndef NET_PARAVIRT_H
274bf50f18SLuigi Rizzo #define NET_PARAVIRT_H
284bf50f18SLuigi Rizzo 
294bf50f18SLuigi Rizzo  /*
304bf50f18SLuigi Rizzo   *
314bf50f18SLuigi Rizzo  Support for virtio-like communication between host (H) and guest (G) NICs.
324bf50f18SLuigi Rizzo 
334bf50f18SLuigi Rizzo  THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
344bf50f18SLuigi Rizzo 
354bf50f18SLuigi Rizzo  The guest allocates the shared Communication Status Block (csb) and
364bf50f18SLuigi Rizzo  write its physical address at CSBAL and CSBAH (data is little endian).
374bf50f18SLuigi Rizzo  csb->csb_on enables the mode. If disabled, the device acts a regular one.
384bf50f18SLuigi Rizzo 
394bf50f18SLuigi Rizzo  Notifications for tx and rx are exchanged without vm exits
404bf50f18SLuigi Rizzo  if possible. In particular (only mentioning csb mode below),
414bf50f18SLuigi Rizzo  the following actions are performed. In the description below,
424bf50f18SLuigi Rizzo  "double check" means verifying again the condition that caused
434bf50f18SLuigi Rizzo  the previous action, and reverting the action if the condition has
444bf50f18SLuigi Rizzo  changed. The condition typically depends on a variable set by the
454bf50f18SLuigi Rizzo  other party, and the double check is done to avoid races. E.g.
464bf50f18SLuigi Rizzo 
474bf50f18SLuigi Rizzo 	// start with A=0
484bf50f18SLuigi Rizzo     again:
494bf50f18SLuigi Rizzo 	// do something
504bf50f18SLuigi Rizzo 	if ( cond(C) ) { // C is written by the other side
514bf50f18SLuigi Rizzo 	    A = 1;
524bf50f18SLuigi Rizzo 	    // barrier
534bf50f18SLuigi Rizzo 	    if ( !cond(C) ) {
544bf50f18SLuigi Rizzo 		A = 0;
554bf50f18SLuigi Rizzo 		goto again;
564bf50f18SLuigi Rizzo 	    }
574bf50f18SLuigi Rizzo 	}
584bf50f18SLuigi Rizzo 
594bf50f18SLuigi Rizzo  TX: start from idle:
604bf50f18SLuigi Rizzo     H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
614bf50f18SLuigi Rizzo     transmissions, G always updates guest_tdt.  If host_need_txkick == 1,
624bf50f18SLuigi Rizzo     G also writes to the TDT, which acts as a kick to H (so pending
634bf50f18SLuigi Rizzo     writes are always dispatched to H as soon as possible.)
644bf50f18SLuigi Rizzo 
654bf50f18SLuigi Rizzo  TX: active state:
664bf50f18SLuigi Rizzo     On the kick (TDT write) H sets host_need_txkick == 0 (if not
674bf50f18SLuigi Rizzo     done already by G), and starts an I/O thread trying to consume
684bf50f18SLuigi Rizzo     packets from TDH to guest_tdt, periodically refreshing host_tdh
694bf50f18SLuigi Rizzo     and TDH.  When host_tdh == guest_tdt, H sets host_need_txkick=1,
704bf50f18SLuigi Rizzo     and then does the "double check" for race avoidance.
714bf50f18SLuigi Rizzo 
724bf50f18SLuigi Rizzo  TX: G runs out of buffers
734bf50f18SLuigi Rizzo     XXX there are two mechanisms, one boolean (using guest_need_txkick)
744bf50f18SLuigi Rizzo     and one with a threshold (using guest_txkick_at). They are mutually
754bf50f18SLuigi Rizzo     exclusive.
764bf50f18SLuigi Rizzo     BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
774bf50f18SLuigi Rizzo         the double check. If H finds guest_need_txkick== 1 on a write
784bf50f18SLuigi Rizzo         to TDH, it also generates an interrupt.
794bf50f18SLuigi Rizzo     THRESHOLD: G sets guest_txkick_at to the TDH value for which it
804bf50f18SLuigi Rizzo 	wants to receive an interrupt. When H detects that TDH moves
814bf50f18SLuigi Rizzo 	across guest_txkick_at, it generates an interrupt.
824bf50f18SLuigi Rizzo 	This second mechanism reduces the number of interrupts and
834bf50f18SLuigi Rizzo 	TDT writes on the transmit side when the host is too slow.
844bf50f18SLuigi Rizzo 
854bf50f18SLuigi Rizzo  RX: start from idle
864bf50f18SLuigi Rizzo     G starts with guest_need_rxkick = 1 when the receive ring is empty.
874bf50f18SLuigi Rizzo     As packets arrive, H updates host_rdh (and RDH) and also generates an
884bf50f18SLuigi Rizzo     interrupt when guest_need_rxkick == 1 (so incoming packets are
894bf50f18SLuigi Rizzo     always reported to G as soon as possible, apart from interrupt
904bf50f18SLuigi Rizzo     moderation delays). It also tracks guest_rdt for new buffers.
914bf50f18SLuigi Rizzo 
924bf50f18SLuigi Rizzo  RX: active state
934bf50f18SLuigi Rizzo     As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
944bf50f18SLuigi Rizzo     draining packets from the receive ring, while updating guest_rdt
954bf50f18SLuigi Rizzo     When G runs out of packets it sets guest_need_rxkick=1 and does the
964bf50f18SLuigi Rizzo     double check.
974bf50f18SLuigi Rizzo 
984bf50f18SLuigi Rizzo  RX: H runs out of buffers
994bf50f18SLuigi Rizzo     XXX there are two mechanisms, one boolean (using host_need_rxkick)
1004bf50f18SLuigi Rizzo     and one with a threshold (using host_xxkick_at). They are mutually
1014bf50f18SLuigi Rizzo     exclusive.
1024bf50f18SLuigi Rizzo     BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
1034bf50f18SLuigi Rizzo 	double check. If G finds host_need_rxkick==1 on updating guest_rdt,
1044bf50f18SLuigi Rizzo         it also writes to RDT causing a kick to H.
1054bf50f18SLuigi Rizzo     THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
1064bf50f18SLuigi Rizzo 	to receive a kick. When G detects that guest_rdt moves across
1074bf50f18SLuigi Rizzo 	host_rxkick_at, it writes to RDT thus generates a kick.
1084bf50f18SLuigi Rizzo 	This second mechanism reduces the number of kicks and
1094bf50f18SLuigi Rizzo         RDT writes on the receive side when the guest is too slow and
1104bf50f18SLuigi Rizzo 	would free only a few buffers at a time.
1114bf50f18SLuigi Rizzo 
1124bf50f18SLuigi Rizzo  */
1134bf50f18SLuigi Rizzo struct paravirt_csb {
1144bf50f18SLuigi Rizzo     /* XXX revise the layout to minimize cache bounces.
1154bf50f18SLuigi Rizzo      * Usage is described as follows:
1164bf50f18SLuigi Rizzo      * 	[GH][RW][+-0]	guest/host reads/writes frequently/rarely/almost never
1174bf50f18SLuigi Rizzo      */
1184bf50f18SLuigi Rizzo     /* these are (mostly) written by the guest */
1194bf50f18SLuigi Rizzo     uint32_t guest_tdt;            /* GW+ HR+ pkt to transmit */
1204bf50f18SLuigi Rizzo     uint32_t guest_need_txkick;    /* GW- HR+ G ran out of tx bufs, request kick */
1214bf50f18SLuigi Rizzo     uint32_t guest_need_rxkick;    /* GW- HR+ G ran out of rx pkts, request kick  */
1224bf50f18SLuigi Rizzo     uint32_t guest_csb_on;         /* GW- HR+ enable paravirtual mode */
1234bf50f18SLuigi Rizzo     uint32_t guest_rdt;            /* GW+ HR+ rx buffers available */
1244bf50f18SLuigi Rizzo     uint32_t guest_txkick_at;      /* GW- HR+ tx ring pos. where G expects an intr */
1254bf50f18SLuigi Rizzo     uint32_t guest_use_msix;        /* GW0 HR0 guest uses MSI-X interrupts. */
1264bf50f18SLuigi Rizzo     uint32_t pad[9];
1274bf50f18SLuigi Rizzo 
1284bf50f18SLuigi Rizzo     /* these are (mostly) written by the host */
1294bf50f18SLuigi Rizzo     uint32_t host_tdh;             /* GR0 HW- shadow register, mostly unused */
1304bf50f18SLuigi Rizzo     uint32_t host_need_txkick;     /* GR+ HW- start the iothread */
1314bf50f18SLuigi Rizzo     uint32_t host_txcycles_lim;    /* GW- HR- how much to spin before  sleep.
1324bf50f18SLuigi Rizzo 				    * set by the guest */
1334bf50f18SLuigi Rizzo     uint32_t host_txcycles;        /* GR0 HW- counter, but no need to be exported */
1344bf50f18SLuigi Rizzo     uint32_t host_rdh;             /* GR0 HW- shadow register, mostly unused */
1354bf50f18SLuigi Rizzo     uint32_t host_need_rxkick;     /* GR+ HW- flush rx queued packets */
1364bf50f18SLuigi Rizzo     uint32_t host_isr;             /* GR* HW* shadow copy of ISR */
1374bf50f18SLuigi Rizzo     uint32_t host_rxkick_at;       /* GR+ HW- rx ring pos where H expects a kick */
1384bf50f18SLuigi Rizzo     uint32_t vnet_ring_high;	/* Vnet ring physical address high. */
1394bf50f18SLuigi Rizzo     uint32_t vnet_ring_low;	/* Vnet ring physical address low. */
1404bf50f18SLuigi Rizzo };
1414bf50f18SLuigi Rizzo 
1424bf50f18SLuigi Rizzo #define NET_PARAVIRT_CSB_SIZE   4096
1434bf50f18SLuigi Rizzo #define NET_PARAVIRT_NONE   (~((uint32_t)0))
1444bf50f18SLuigi Rizzo 
1454bf50f18SLuigi Rizzo #ifdef	QEMU_PCI_H
1464bf50f18SLuigi Rizzo 
1474bf50f18SLuigi Rizzo /*
1484bf50f18SLuigi Rizzo  * API functions only available within QEMU
1494bf50f18SLuigi Rizzo  */
1504bf50f18SLuigi Rizzo 
1514bf50f18SLuigi Rizzo void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
1524bf50f18SLuigi Rizzo 			uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
1534bf50f18SLuigi Rizzo 
1544bf50f18SLuigi Rizzo #endif /* QEMU_PCI_H */
1554bf50f18SLuigi Rizzo 
1564bf50f18SLuigi Rizzo #endif /* NET_PARAVIRT_H */
157