1 /*
2  * Xen 9p backend
3  *
4  * Copyright Aporeto 2017
5  *
6  * Authors:
7  *  Stefano Stabellini <stefano@aporeto.com>
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 
13 #include "hw/9pfs/9p.h"
14 #include "hw/xen/xen-legacy-backend.h"
15 #include "hw/9pfs/xen-9pfs.h"
16 #include "qapi/error.h"
17 #include "qemu/config-file.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/option.h"
20 #include "fsdev/qemu-fsdev.h"
21 
22 #define VERSIONS "1"
23 #define MAX_RINGS 8
24 #define MAX_RING_ORDER 8
25 
26 typedef struct Xen9pfsRing {
27     struct Xen9pfsDev *priv;
28 
29     int ref;
30     xenevtchn_handle   *evtchndev;
31     int evtchn;
32     int local_port;
33     int ring_order;
34     struct xen_9pfs_data_intf *intf;
35     unsigned char *data;
36     struct xen_9pfs_data ring;
37 
38     struct iovec *sg;
39     QEMUBH *bh;
40     Coroutine *co;
41 
42     /* local copies, so that we can read/write PDU data directly from
43      * the ring */
44     RING_IDX out_cons, out_size, in_cons;
45     bool inprogress;
46 } Xen9pfsRing;
47 
48 typedef struct Xen9pfsDev {
49     struct XenLegacyDevice xendev;  /* must be first */
50     V9fsState state;
51     char *path;
52     char *security_model;
53     char *tag;
54     char *id;
55 
56     int num_rings;
57     Xen9pfsRing *rings;
58 } Xen9pfsDev;
59 
60 static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
61 
xen_9pfs_in_sg(Xen9pfsRing * ring,struct iovec * in_sg,int * num,uint32_t idx,uint32_t size)62 static void xen_9pfs_in_sg(Xen9pfsRing *ring,
63                            struct iovec *in_sg,
64                            int *num,
65                            uint32_t idx,
66                            uint32_t size)
67 {
68     RING_IDX cons, prod, masked_prod, masked_cons;
69 
70     cons = ring->intf->in_cons;
71     prod = ring->intf->in_prod;
72     xen_rmb();
73     masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
74     masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
75 
76     if (masked_prod < masked_cons) {
77         in_sg[0].iov_base = ring->ring.in + masked_prod;
78         in_sg[0].iov_len = masked_cons - masked_prod;
79         *num = 1;
80     } else {
81         in_sg[0].iov_base = ring->ring.in + masked_prod;
82         in_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) - masked_prod;
83         in_sg[1].iov_base = ring->ring.in;
84         in_sg[1].iov_len = masked_cons;
85         *num = 2;
86     }
87 }
88 
xen_9pfs_out_sg(Xen9pfsRing * ring,struct iovec * out_sg,int * num,uint32_t idx)89 static void xen_9pfs_out_sg(Xen9pfsRing *ring,
90                             struct iovec *out_sg,
91                             int *num,
92                             uint32_t idx)
93 {
94     RING_IDX cons, prod, masked_prod, masked_cons;
95 
96     cons = ring->intf->out_cons;
97     prod = ring->intf->out_prod;
98     xen_rmb();
99     masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
100     masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
101 
102     if (masked_cons < masked_prod) {
103         out_sg[0].iov_base = ring->ring.out + masked_cons;
104         out_sg[0].iov_len = ring->out_size;
105         *num = 1;
106     } else {
107         if (ring->out_size >
108             (XEN_FLEX_RING_SIZE(ring->ring_order) - masked_cons)) {
109             out_sg[0].iov_base = ring->ring.out + masked_cons;
110             out_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) -
111                                 masked_cons;
112             out_sg[1].iov_base = ring->ring.out;
113             out_sg[1].iov_len = ring->out_size -
114                                 (XEN_FLEX_RING_SIZE(ring->ring_order) -
115                                  masked_cons);
116             *num = 2;
117         } else {
118             out_sg[0].iov_base = ring->ring.out + masked_cons;
119             out_sg[0].iov_len = ring->out_size;
120             *num = 1;
121         }
122     }
123 }
124 
xen_9pfs_pdu_vmarshal(V9fsPDU * pdu,size_t offset,const char * fmt,va_list ap)125 static ssize_t xen_9pfs_pdu_vmarshal(V9fsPDU *pdu,
126                                      size_t offset,
127                                      const char *fmt,
128                                      va_list ap)
129 {
130     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
131     struct iovec in_sg[2];
132     int num;
133     ssize_t ret;
134 
135     xen_9pfs_in_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
136                    in_sg, &num, pdu->idx, ROUND_UP(offset + 128, 512));
137 
138     ret = v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap);
139     if (ret < 0) {
140         xen_pv_printf(&xen_9pfs->xendev, 0,
141                       "Failed to encode VirtFS request type %d\n", pdu->id + 1);
142         xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
143         xen_9pfs_disconnect(&xen_9pfs->xendev);
144     }
145     return ret;
146 }
147 
xen_9pfs_pdu_vunmarshal(V9fsPDU * pdu,size_t offset,const char * fmt,va_list ap)148 static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu,
149                                        size_t offset,
150                                        const char *fmt,
151                                        va_list ap)
152 {
153     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
154     struct iovec out_sg[2];
155     int num;
156     ssize_t ret;
157 
158     xen_9pfs_out_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
159                     out_sg, &num, pdu->idx);
160 
161     ret = v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap);
162     if (ret < 0) {
163         xen_pv_printf(&xen_9pfs->xendev, 0,
164                       "Failed to decode VirtFS request type %d\n", pdu->id);
165         xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
166         xen_9pfs_disconnect(&xen_9pfs->xendev);
167     }
168     return ret;
169 }
170 
xen_9pfs_init_out_iov_from_pdu(V9fsPDU * pdu,struct iovec ** piov,unsigned int * pniov,size_t size)171 static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu,
172                                            struct iovec **piov,
173                                            unsigned int *pniov,
174                                            size_t size)
175 {
176     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
177     Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
178     int num;
179 
180     g_free(ring->sg);
181 
182     ring->sg = g_new0(struct iovec, 2);
183     xen_9pfs_out_sg(ring, ring->sg, &num, pdu->idx);
184     *piov = ring->sg;
185     *pniov = num;
186 }
187 
xen_9pfs_init_in_iov_from_pdu(V9fsPDU * pdu,struct iovec ** piov,unsigned int * pniov,size_t size)188 static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu,
189                                           struct iovec **piov,
190                                           unsigned int *pniov,
191                                           size_t size)
192 {
193     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
194     Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
195     int num;
196     size_t buf_size;
197 
198     g_free(ring->sg);
199 
200     ring->sg = g_new0(struct iovec, 2);
201     ring->co = qemu_coroutine_self();
202     /* make sure other threads see ring->co changes before continuing */
203     smp_wmb();
204 
205 again:
206     xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size);
207     buf_size = iov_size(ring->sg, num);
208     if (buf_size  < size) {
209         qemu_coroutine_yield();
210         goto again;
211     }
212     ring->co = NULL;
213     /* make sure other threads see ring->co changes before continuing */
214     smp_wmb();
215 
216     *piov = ring->sg;
217     *pniov = num;
218 }
219 
xen_9pfs_push_and_notify(V9fsPDU * pdu)220 static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
221 {
222     RING_IDX prod;
223     Xen9pfsDev *priv = container_of(pdu->s, Xen9pfsDev, state);
224     Xen9pfsRing *ring = &priv->rings[pdu->tag % priv->num_rings];
225 
226     g_free(ring->sg);
227     ring->sg = NULL;
228 
229     ring->intf->out_cons = ring->out_cons;
230     xen_wmb();
231 
232     prod = ring->intf->in_prod;
233     xen_rmb();
234     ring->intf->in_prod = prod + pdu->size;
235     xen_wmb();
236 
237     ring->inprogress = false;
238     xenevtchn_notify(ring->evtchndev, ring->local_port);
239 
240     qemu_bh_schedule(ring->bh);
241 }
242 
243 static const V9fsTransport xen_9p_transport = {
244     .pdu_vmarshal = xen_9pfs_pdu_vmarshal,
245     .pdu_vunmarshal = xen_9pfs_pdu_vunmarshal,
246     .init_in_iov_from_pdu = xen_9pfs_init_in_iov_from_pdu,
247     .init_out_iov_from_pdu = xen_9pfs_init_out_iov_from_pdu,
248     .push_and_notify = xen_9pfs_push_and_notify,
249 };
250 
xen_9pfs_init(struct XenLegacyDevice * xendev)251 static int xen_9pfs_init(struct XenLegacyDevice *xendev)
252 {
253     return 0;
254 }
255 
xen_9pfs_receive(Xen9pfsRing * ring)256 static int xen_9pfs_receive(Xen9pfsRing *ring)
257 {
258     P9MsgHeader h;
259     RING_IDX cons, prod, masked_prod, masked_cons, queued;
260     V9fsPDU *pdu;
261 
262     if (ring->inprogress) {
263         return 0;
264     }
265 
266     cons = ring->intf->out_cons;
267     prod = ring->intf->out_prod;
268     xen_rmb();
269 
270     queued = xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order));
271     if (queued < sizeof(h)) {
272         return 0;
273     }
274     ring->inprogress = true;
275 
276     masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
277     masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
278 
279     xen_9pfs_read_packet((uint8_t *) &h, ring->ring.out, sizeof(h),
280                          masked_prod, &masked_cons,
281                          XEN_FLEX_RING_SIZE(ring->ring_order));
282     if (queued < le32_to_cpu(h.size_le)) {
283         return 0;
284     }
285 
286     /* cannot fail, because we only handle one request per ring at a time */
287     pdu = pdu_alloc(&ring->priv->state);
288     ring->out_size = le32_to_cpu(h.size_le);
289     ring->out_cons = cons + le32_to_cpu(h.size_le);
290 
291     pdu_submit(pdu, &h);
292 
293     return 0;
294 }
295 
xen_9pfs_bh(void * opaque)296 static void xen_9pfs_bh(void *opaque)
297 {
298     Xen9pfsRing *ring = opaque;
299     bool wait;
300 
301 again:
302     wait = ring->co != NULL && qemu_coroutine_entered(ring->co);
303     /* paired with the smb_wmb barriers in xen_9pfs_init_in_iov_from_pdu */
304     smp_rmb();
305     if (wait) {
306         cpu_relax();
307         goto again;
308     }
309 
310     if (ring->co != NULL) {
311         qemu_coroutine_enter_if_inactive(ring->co);
312     }
313     xen_9pfs_receive(ring);
314 }
315 
xen_9pfs_evtchn_event(void * opaque)316 static void xen_9pfs_evtchn_event(void *opaque)
317 {
318     Xen9pfsRing *ring = opaque;
319     evtchn_port_t port;
320 
321     port = xenevtchn_pending(ring->evtchndev);
322     xenevtchn_unmask(ring->evtchndev, port);
323 
324     qemu_bh_schedule(ring->bh);
325 }
326 
xen_9pfs_disconnect(struct XenLegacyDevice * xendev)327 static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
328 {
329     Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
330     int i;
331 
332     for (i = 0; i < xen_9pdev->num_rings; i++) {
333         if (xen_9pdev->rings[i].evtchndev != NULL) {
334             qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
335                     NULL, NULL, NULL);
336             xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
337                              xen_9pdev->rings[i].local_port);
338             xen_9pdev->rings[i].evtchndev = NULL;
339         }
340     }
341 }
342 
xen_9pfs_free(struct XenLegacyDevice * xendev)343 static int xen_9pfs_free(struct XenLegacyDevice *xendev)
344 {
345     Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
346     int i;
347 
348     if (xen_9pdev->rings[0].evtchndev != NULL) {
349         xen_9pfs_disconnect(xendev);
350     }
351 
352     for (i = 0; i < xen_9pdev->num_rings; i++) {
353         if (xen_9pdev->rings[i].data != NULL) {
354             xen_be_unmap_grant_refs(&xen_9pdev->xendev,
355                                     xen_9pdev->rings[i].data,
356                                     (1 << xen_9pdev->rings[i].ring_order));
357         }
358         if (xen_9pdev->rings[i].intf != NULL) {
359             xen_be_unmap_grant_refs(&xen_9pdev->xendev,
360                                     xen_9pdev->rings[i].intf,
361                                     1);
362         }
363         if (xen_9pdev->rings[i].bh != NULL) {
364             qemu_bh_delete(xen_9pdev->rings[i].bh);
365         }
366     }
367 
368     g_free(xen_9pdev->id);
369     g_free(xen_9pdev->tag);
370     g_free(xen_9pdev->path);
371     g_free(xen_9pdev->security_model);
372     g_free(xen_9pdev->rings);
373     return 0;
374 }
375 
xen_9pfs_connect(struct XenLegacyDevice * xendev)376 static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
377 {
378     Error *err = NULL;
379     int i;
380     Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
381     V9fsState *s = &xen_9pdev->state;
382     QemuOpts *fsdev;
383 
384     if (xenstore_read_fe_int(&xen_9pdev->xendev, "num-rings",
385                              &xen_9pdev->num_rings) == -1 ||
386         xen_9pdev->num_rings > MAX_RINGS || xen_9pdev->num_rings < 1) {
387         return -1;
388     }
389 
390     xen_9pdev->rings = g_new0(Xen9pfsRing, xen_9pdev->num_rings);
391     for (i = 0; i < xen_9pdev->num_rings; i++) {
392         char *str;
393         int ring_order;
394 
395         xen_9pdev->rings[i].priv = xen_9pdev;
396         xen_9pdev->rings[i].evtchn = -1;
397         xen_9pdev->rings[i].local_port = -1;
398 
399         str = g_strdup_printf("ring-ref%u", i);
400         if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
401                                  &xen_9pdev->rings[i].ref) == -1) {
402             g_free(str);
403             goto out;
404         }
405         g_free(str);
406         str = g_strdup_printf("event-channel-%u", i);
407         if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
408                                  &xen_9pdev->rings[i].evtchn) == -1) {
409             g_free(str);
410             goto out;
411         }
412         g_free(str);
413 
414         xen_9pdev->rings[i].intf =
415             xen_be_map_grant_ref(&xen_9pdev->xendev,
416                                  xen_9pdev->rings[i].ref,
417                                  PROT_READ | PROT_WRITE);
418         if (!xen_9pdev->rings[i].intf) {
419             goto out;
420         }
421         ring_order = xen_9pdev->rings[i].intf->ring_order;
422         if (ring_order > MAX_RING_ORDER) {
423             goto out;
424         }
425         xen_9pdev->rings[i].ring_order = ring_order;
426         xen_9pdev->rings[i].data =
427             xen_be_map_grant_refs(&xen_9pdev->xendev,
428                                   xen_9pdev->rings[i].intf->ref,
429                                   (1 << ring_order),
430                                   PROT_READ | PROT_WRITE);
431         if (!xen_9pdev->rings[i].data) {
432             goto out;
433         }
434         xen_9pdev->rings[i].ring.in = xen_9pdev->rings[i].data;
435         xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
436                                        XEN_FLEX_RING_SIZE(ring_order);
437 
438         xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
439         xen_9pdev->rings[i].out_cons = 0;
440         xen_9pdev->rings[i].out_size = 0;
441         xen_9pdev->rings[i].inprogress = false;
442 
443 
444         xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
445         if (xen_9pdev->rings[i].evtchndev == NULL) {
446             goto out;
447         }
448         qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
449         xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
450                                             (xen_9pdev->rings[i].evtchndev,
451                                              xendev->dom,
452                                              xen_9pdev->rings[i].evtchn);
453         if (xen_9pdev->rings[i].local_port == -1) {
454             xen_pv_printf(xendev, 0,
455                           "xenevtchn_bind_interdomain failed port=%d\n",
456                           xen_9pdev->rings[i].evtchn);
457             goto out;
458         }
459         xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
460         qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
461                 xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
462     }
463 
464     xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
465     xen_9pdev->path = xenstore_read_be_str(xendev, "path");
466     xen_9pdev->id = s->fsconf.fsdev_id =
467         g_strdup_printf("xen9p%d", xendev->dev);
468     xen_9pdev->tag = s->fsconf.tag = xenstore_read_fe_str(xendev, "tag");
469     fsdev = qemu_opts_create(qemu_find_opts("fsdev"),
470             s->fsconf.tag,
471             1, NULL);
472     qemu_opt_set(fsdev, "fsdriver", "local", NULL);
473     qemu_opt_set(fsdev, "path", xen_9pdev->path, NULL);
474     qemu_opt_set(fsdev, "security_model", xen_9pdev->security_model, NULL);
475     qemu_opts_set_id(fsdev, s->fsconf.fsdev_id);
476     qemu_fsdev_add(fsdev, &err);
477     if (err) {
478         error_report_err(err);
479     }
480     v9fs_device_realize_common(s, &xen_9p_transport, NULL);
481 
482     return 0;
483 
484 out:
485     xen_9pfs_free(xendev);
486     return -1;
487 }
488 
xen_9pfs_alloc(struct XenLegacyDevice * xendev)489 static void xen_9pfs_alloc(struct XenLegacyDevice *xendev)
490 {
491     xenstore_write_be_str(xendev, "versions", VERSIONS);
492     xenstore_write_be_int(xendev, "max-rings", MAX_RINGS);
493     xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
494 }
495 
496 struct XenDevOps xen_9pfs_ops = {
497     .size       = sizeof(Xen9pfsDev),
498     .flags      = DEVOPS_FLAG_NEED_GNTDEV,
499     .alloc      = xen_9pfs_alloc,
500     .init       = xen_9pfs_init,
501     .initialise = xen_9pfs_connect,
502     .disconnect = xen_9pfs_disconnect,
503     .free       = xen_9pfs_free,
504 };
505