1 /*
2 * Xen 9p backend
3 *
4 * Copyright Aporeto 2017
5 *
6 * Authors:
7 * Stefano Stabellini <stefano@aporeto.com>
8 *
9 */
10
11 /*
12 * Not so fast! You might want to read the 9p developer docs first:
13 * https://wiki.qemu.org/Documentation/9p
14 */
15
16 #include "qemu/osdep.h"
17
18 #include "hw/9pfs/9p.h"
19 #include "hw/xen/xen-legacy-backend.h"
20 #include "hw/9pfs/xen-9pfs.h"
21 #include "qapi/error.h"
22 #include "qemu/config-file.h"
23 #include "qemu/main-loop.h"
24 #include "qemu/option.h"
25 #include "qemu/iov.h"
26 #include "fsdev/qemu-fsdev.h"
27
28 #include "trace.h"
29
30 #define VERSIONS "1"
31 #define MAX_RINGS 8
32 #define MAX_RING_ORDER 9
33
34 typedef struct Xen9pfsRing {
35 struct Xen9pfsDev *priv;
36
37 int ref;
38 xenevtchn_handle *evtchndev;
39 int evtchn;
40 int local_port;
41 int ring_order;
42 struct xen_9pfs_data_intf *intf;
43 unsigned char *data;
44 struct xen_9pfs_data ring;
45
46 struct iovec *sg;
47 QEMUBH *bh;
48 Coroutine *co;
49
50 /* local copies, so that we can read/write PDU data directly from
51 * the ring */
52 RING_IDX out_cons, out_size, in_cons;
53 bool inprogress;
54 } Xen9pfsRing;
55
56 typedef struct Xen9pfsDev {
57 struct XenLegacyDevice xendev; /* must be first */
58 V9fsState state;
59 char *path;
60 char *security_model;
61 char *tag;
62 char *id;
63
64 int num_rings;
65 Xen9pfsRing *rings;
66 MemReentrancyGuard mem_reentrancy_guard;
67 } Xen9pfsDev;
68
69 static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
70
xen_9pfs_in_sg(Xen9pfsRing * ring,struct iovec * in_sg,int * num,uint32_t idx,uint32_t size)71 static void xen_9pfs_in_sg(Xen9pfsRing *ring,
72 struct iovec *in_sg,
73 int *num,
74 uint32_t idx,
75 uint32_t size)
76 {
77 RING_IDX cons, prod, masked_prod, masked_cons;
78
79 cons = ring->intf->in_cons;
80 prod = ring->intf->in_prod;
81 xen_rmb();
82 masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
83 masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
84
85 if (masked_prod < masked_cons) {
86 in_sg[0].iov_base = ring->ring.in + masked_prod;
87 in_sg[0].iov_len = masked_cons - masked_prod;
88 *num = 1;
89 } else {
90 in_sg[0].iov_base = ring->ring.in + masked_prod;
91 in_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) - masked_prod;
92 in_sg[1].iov_base = ring->ring.in;
93 in_sg[1].iov_len = masked_cons;
94 *num = 2;
95 }
96 }
97
xen_9pfs_out_sg(Xen9pfsRing * ring,struct iovec * out_sg,int * num,uint32_t idx)98 static void xen_9pfs_out_sg(Xen9pfsRing *ring,
99 struct iovec *out_sg,
100 int *num,
101 uint32_t idx)
102 {
103 RING_IDX cons, prod, masked_prod, masked_cons;
104
105 cons = ring->intf->out_cons;
106 prod = ring->intf->out_prod;
107 xen_rmb();
108 masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
109 masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
110
111 if (masked_cons < masked_prod) {
112 out_sg[0].iov_base = ring->ring.out + masked_cons;
113 out_sg[0].iov_len = ring->out_size;
114 *num = 1;
115 } else {
116 if (ring->out_size >
117 (XEN_FLEX_RING_SIZE(ring->ring_order) - masked_cons)) {
118 out_sg[0].iov_base = ring->ring.out + masked_cons;
119 out_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) -
120 masked_cons;
121 out_sg[1].iov_base = ring->ring.out;
122 out_sg[1].iov_len = ring->out_size -
123 (XEN_FLEX_RING_SIZE(ring->ring_order) -
124 masked_cons);
125 *num = 2;
126 } else {
127 out_sg[0].iov_base = ring->ring.out + masked_cons;
128 out_sg[0].iov_len = ring->out_size;
129 *num = 1;
130 }
131 }
132 }
133
xen_9pfs_pdu_vmarshal(V9fsPDU * pdu,size_t offset,const char * fmt,va_list ap)134 static ssize_t xen_9pfs_pdu_vmarshal(V9fsPDU *pdu,
135 size_t offset,
136 const char *fmt,
137 va_list ap)
138 {
139 Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
140 struct iovec in_sg[2];
141 int num;
142 ssize_t ret;
143
144 xen_9pfs_in_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
145 in_sg, &num, pdu->idx, ROUND_UP(offset + 128, 512));
146
147 ret = v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap);
148 if (ret < 0) {
149 xen_pv_printf(&xen_9pfs->xendev, 0,
150 "Failed to encode VirtFS reply type %d\n",
151 pdu->id + 1);
152 xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
153 xen_9pfs_disconnect(&xen_9pfs->xendev);
154 }
155 return ret;
156 }
157
xen_9pfs_pdu_vunmarshal(V9fsPDU * pdu,size_t offset,const char * fmt,va_list ap)158 static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu,
159 size_t offset,
160 const char *fmt,
161 va_list ap)
162 {
163 Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
164 struct iovec out_sg[2];
165 int num;
166 ssize_t ret;
167
168 xen_9pfs_out_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
169 out_sg, &num, pdu->idx);
170
171 ret = v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap);
172 if (ret < 0) {
173 xen_pv_printf(&xen_9pfs->xendev, 0,
174 "Failed to decode VirtFS request type %d\n", pdu->id);
175 xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
176 xen_9pfs_disconnect(&xen_9pfs->xendev);
177 }
178 return ret;
179 }
180
xen_9pfs_init_out_iov_from_pdu(V9fsPDU * pdu,struct iovec ** piov,unsigned int * pniov,size_t size)181 static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu,
182 struct iovec **piov,
183 unsigned int *pniov,
184 size_t size)
185 {
186 Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
187 Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
188 int num;
189
190 g_free(ring->sg);
191
192 ring->sg = g_new0(struct iovec, 2);
193 xen_9pfs_out_sg(ring, ring->sg, &num, pdu->idx);
194 *piov = ring->sg;
195 *pniov = num;
196 }
197
xen_9pfs_init_in_iov_from_pdu(V9fsPDU * pdu,struct iovec ** piov,unsigned int * pniov,size_t size)198 static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu,
199 struct iovec **piov,
200 unsigned int *pniov,
201 size_t size)
202 {
203 Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
204 Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
205 int num;
206 size_t buf_size;
207
208 g_free(ring->sg);
209
210 ring->sg = g_new0(struct iovec, 2);
211 ring->co = qemu_coroutine_self();
212 /* make sure other threads see ring->co changes before continuing */
213 smp_wmb();
214
215 again:
216 xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size);
217 buf_size = iov_size(ring->sg, num);
218 if (buf_size < size) {
219 qemu_coroutine_yield();
220 goto again;
221 }
222 ring->co = NULL;
223 /* make sure other threads see ring->co changes before continuing */
224 smp_wmb();
225
226 *piov = ring->sg;
227 *pniov = num;
228 }
229
xen_9pfs_push_and_notify(V9fsPDU * pdu)230 static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
231 {
232 RING_IDX prod;
233 Xen9pfsDev *priv = container_of(pdu->s, Xen9pfsDev, state);
234 Xen9pfsRing *ring = &priv->rings[pdu->tag % priv->num_rings];
235
236 g_free(ring->sg);
237 ring->sg = NULL;
238
239 ring->intf->out_cons = ring->out_cons;
240 xen_wmb();
241
242 prod = ring->intf->in_prod;
243 xen_rmb();
244 ring->intf->in_prod = prod + pdu->size;
245 xen_wmb();
246
247 ring->inprogress = false;
248 qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port);
249
250 qemu_bh_schedule(ring->bh);
251 }
252
253 static const V9fsTransport xen_9p_transport = {
254 .pdu_vmarshal = xen_9pfs_pdu_vmarshal,
255 .pdu_vunmarshal = xen_9pfs_pdu_vunmarshal,
256 .init_in_iov_from_pdu = xen_9pfs_init_in_iov_from_pdu,
257 .init_out_iov_from_pdu = xen_9pfs_init_out_iov_from_pdu,
258 .push_and_notify = xen_9pfs_push_and_notify,
259 };
260
xen_9pfs_init(struct XenLegacyDevice * xendev)261 static int xen_9pfs_init(struct XenLegacyDevice *xendev)
262 {
263 return 0;
264 }
265
xen_9pfs_receive(Xen9pfsRing * ring)266 static int xen_9pfs_receive(Xen9pfsRing *ring)
267 {
268 P9MsgHeader h;
269 RING_IDX cons, prod, masked_prod, masked_cons, queued;
270 V9fsPDU *pdu;
271
272 if (ring->inprogress) {
273 return 0;
274 }
275
276 cons = ring->intf->out_cons;
277 prod = ring->intf->out_prod;
278 xen_rmb();
279
280 queued = xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order));
281 if (queued < sizeof(h)) {
282 return 0;
283 }
284 ring->inprogress = true;
285
286 masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
287 masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));
288
289 xen_9pfs_read_packet((uint8_t *) &h, ring->ring.out, sizeof(h),
290 masked_prod, &masked_cons,
291 XEN_FLEX_RING_SIZE(ring->ring_order));
292 if (queued < le32_to_cpu(h.size_le)) {
293 return 0;
294 }
295
296 /* cannot fail, because we only handle one request per ring at a time */
297 pdu = pdu_alloc(&ring->priv->state);
298 ring->out_size = le32_to_cpu(h.size_le);
299 ring->out_cons = cons + le32_to_cpu(h.size_le);
300
301 pdu_submit(pdu, &h);
302
303 return 0;
304 }
305
xen_9pfs_bh(void * opaque)306 static void xen_9pfs_bh(void *opaque)
307 {
308 Xen9pfsRing *ring = opaque;
309 bool wait;
310
311 again:
312 wait = ring->co != NULL && qemu_coroutine_entered(ring->co);
313 /* paired with the smb_wmb barriers in xen_9pfs_init_in_iov_from_pdu */
314 smp_rmb();
315 if (wait) {
316 cpu_relax();
317 goto again;
318 }
319
320 if (ring->co != NULL) {
321 qemu_coroutine_enter_if_inactive(ring->co);
322 }
323 xen_9pfs_receive(ring);
324 }
325
xen_9pfs_evtchn_event(void * opaque)326 static void xen_9pfs_evtchn_event(void *opaque)
327 {
328 Xen9pfsRing *ring = opaque;
329 evtchn_port_t port;
330
331 port = qemu_xen_evtchn_pending(ring->evtchndev);
332 qemu_xen_evtchn_unmask(ring->evtchndev, port);
333
334 qemu_bh_schedule(ring->bh);
335 }
336
xen_9pfs_disconnect(struct XenLegacyDevice * xendev)337 static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
338 {
339 Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
340 int i;
341
342 trace_xen_9pfs_disconnect(xendev->name);
343
344 for (i = 0; i < xen_9pdev->num_rings; i++) {
345 if (xen_9pdev->rings[i].evtchndev != NULL) {
346 qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
347 NULL, NULL, NULL);
348 qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev,
349 xen_9pdev->rings[i].local_port);
350 xen_9pdev->rings[i].evtchndev = NULL;
351 }
352 if (xen_9pdev->rings[i].data != NULL) {
353 xen_be_unmap_grant_refs(&xen_9pdev->xendev,
354 xen_9pdev->rings[i].data,
355 xen_9pdev->rings[i].intf->ref,
356 (1 << xen_9pdev->rings[i].ring_order));
357 xen_9pdev->rings[i].data = NULL;
358 }
359 if (xen_9pdev->rings[i].intf != NULL) {
360 xen_be_unmap_grant_ref(&xen_9pdev->xendev,
361 xen_9pdev->rings[i].intf,
362 xen_9pdev->rings[i].ref);
363 xen_9pdev->rings[i].intf = NULL;
364 }
365 if (xen_9pdev->rings[i].bh != NULL) {
366 qemu_bh_delete(xen_9pdev->rings[i].bh);
367 xen_9pdev->rings[i].bh = NULL;
368 }
369 }
370
371 g_free(xen_9pdev->id);
372 xen_9pdev->id = NULL;
373 g_free(xen_9pdev->tag);
374 xen_9pdev->tag = NULL;
375 g_free(xen_9pdev->path);
376 xen_9pdev->path = NULL;
377 g_free(xen_9pdev->security_model);
378 xen_9pdev->security_model = NULL;
379 g_free(xen_9pdev->rings);
380 xen_9pdev->rings = NULL;
381 }
382
xen_9pfs_free(struct XenLegacyDevice * xendev)383 static int xen_9pfs_free(struct XenLegacyDevice *xendev)
384 {
385 trace_xen_9pfs_free(xendev->name);
386
387 return 0;
388 }
389
xen_9pfs_connect(struct XenLegacyDevice * xendev)390 static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
391 {
392 Error *err = NULL;
393 int i;
394 Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
395 V9fsState *s = &xen_9pdev->state;
396 QemuOpts *fsdev;
397
398 trace_xen_9pfs_connect(xendev->name);
399
400 if (xenstore_read_fe_int(&xen_9pdev->xendev, "num-rings",
401 &xen_9pdev->num_rings) == -1 ||
402 xen_9pdev->num_rings > MAX_RINGS || xen_9pdev->num_rings < 1) {
403 return -1;
404 }
405
406 xen_9pdev->rings = g_new0(Xen9pfsRing, xen_9pdev->num_rings);
407 for (i = 0; i < xen_9pdev->num_rings; i++) {
408 char *str;
409 int ring_order;
410
411 xen_9pdev->rings[i].priv = xen_9pdev;
412 xen_9pdev->rings[i].evtchn = -1;
413 xen_9pdev->rings[i].local_port = -1;
414
415 str = g_strdup_printf("ring-ref%u", i);
416 if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
417 &xen_9pdev->rings[i].ref) == -1) {
418 g_free(str);
419 goto out;
420 }
421 g_free(str);
422 str = g_strdup_printf("event-channel-%u", i);
423 if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
424 &xen_9pdev->rings[i].evtchn) == -1) {
425 g_free(str);
426 goto out;
427 }
428 g_free(str);
429
430 xen_9pdev->rings[i].intf =
431 xen_be_map_grant_ref(&xen_9pdev->xendev,
432 xen_9pdev->rings[i].ref,
433 PROT_READ | PROT_WRITE);
434 if (!xen_9pdev->rings[i].intf) {
435 goto out;
436 }
437 ring_order = xen_9pdev->rings[i].intf->ring_order;
438 if (ring_order > MAX_RING_ORDER) {
439 goto out;
440 }
441 xen_9pdev->rings[i].ring_order = ring_order;
442 xen_9pdev->rings[i].data =
443 xen_be_map_grant_refs(&xen_9pdev->xendev,
444 xen_9pdev->rings[i].intf->ref,
445 (1 << ring_order),
446 PROT_READ | PROT_WRITE);
447 if (!xen_9pdev->rings[i].data) {
448 goto out;
449 }
450 xen_9pdev->rings[i].ring.in = xen_9pdev->rings[i].data;
451 xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
452 XEN_FLEX_RING_SIZE(ring_order);
453
454 xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
455 &xen_9pdev->rings[i],
456 &xen_9pdev->mem_reentrancy_guard);
457 xen_9pdev->rings[i].out_cons = 0;
458 xen_9pdev->rings[i].out_size = 0;
459 xen_9pdev->rings[i].inprogress = false;
460
461
462 xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open();
463 if (xen_9pdev->rings[i].evtchndev == NULL) {
464 goto out;
465 }
466 qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev));
467 xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain
468 (xen_9pdev->rings[i].evtchndev,
469 xendev->dom,
470 xen_9pdev->rings[i].evtchn);
471 if (xen_9pdev->rings[i].local_port == -1) {
472 xen_pv_printf(xendev, 0,
473 "xenevtchn_bind_interdomain failed port=%d\n",
474 xen_9pdev->rings[i].evtchn);
475 goto out;
476 }
477 xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
478 qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
479 xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
480 }
481
482 xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
483 xen_9pdev->path = xenstore_read_be_str(xendev, "path");
484 xen_9pdev->id = s->fsconf.fsdev_id =
485 g_strdup_printf("xen9p%d", xendev->dev);
486 xen_9pdev->tag = s->fsconf.tag = xenstore_read_fe_str(xendev, "tag");
487 fsdev = qemu_opts_create(qemu_find_opts("fsdev"),
488 s->fsconf.tag,
489 1, NULL);
490 qemu_opt_set(fsdev, "fsdriver", "local", NULL);
491 qemu_opt_set(fsdev, "path", xen_9pdev->path, NULL);
492 qemu_opt_set(fsdev, "security_model", xen_9pdev->security_model, NULL);
493 qemu_opts_set_id(fsdev, s->fsconf.fsdev_id);
494 qemu_fsdev_add(fsdev, &err);
495 if (err) {
496 error_report_err(err);
497 }
498 v9fs_device_realize_common(s, &xen_9p_transport, NULL);
499
500 return 0;
501
502 out:
503 xen_9pfs_free(xendev);
504 return -1;
505 }
506
xen_9pfs_alloc(struct XenLegacyDevice * xendev)507 static void xen_9pfs_alloc(struct XenLegacyDevice *xendev)
508 {
509 trace_xen_9pfs_alloc(xendev->name);
510
511 xenstore_write_be_str(xendev, "versions", VERSIONS);
512 xenstore_write_be_int(xendev, "max-rings", MAX_RINGS);
513 xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
514 }
515
516 static const struct XenDevOps xen_9pfs_ops = {
517 .size = sizeof(Xen9pfsDev),
518 .flags = DEVOPS_FLAG_NEED_GNTDEV,
519 .alloc = xen_9pfs_alloc,
520 .init = xen_9pfs_init,
521 .initialise = xen_9pfs_connect,
522 .disconnect = xen_9pfs_disconnect,
523 .free = xen_9pfs_free,
524 };
525
xen_9pfs_register_backend(void)526 static void xen_9pfs_register_backend(void)
527 {
528 xen_be_register("9pfs", &xen_9pfs_ops);
529 }
530 xen_backend_init(xen_9pfs_register_backend);
531