xref: /qemu/scsi/qemu-pr-helper.c (revision 5b76dd13)
1 /*
2  * Privileged helper to handle persistent reservation commands for QEMU
3  *
4  * Copyright (C) 2017 Red Hat, Inc. <pbonzini@redhat.com>
5  *
6  * Author: Paolo Bonzini <pbonzini@redhat.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; under version 2 of the License.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include <getopt.h>
23 #include <sys/ioctl.h>
24 #include <linux/dm-ioctl.h>
25 #include <scsi/sg.h>
26 
27 #ifdef CONFIG_LIBCAP
28 #include <cap-ng.h>
29 #endif
30 #include <pwd.h>
31 #include <grp.h>
32 
33 #ifdef CONFIG_MPATH
34 #include <libudev.h>
35 #include <mpath_cmd.h>
36 #include <mpath_persist.h>
37 #endif
38 
39 #include "qapi/error.h"
40 #include "qemu-common.h"
41 #include "qemu/cutils.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/error-report.h"
44 #include "qemu/config-file.h"
45 #include "qemu/bswap.h"
46 #include "qemu/log.h"
47 #include "qemu/systemd.h"
48 #include "qapi/util.h"
49 #include "qapi/qmp/qstring.h"
50 #include "io/channel-socket.h"
51 #include "trace/control.h"
52 #include "qemu-version.h"
53 
54 #include "block/aio.h"
55 #include "block/thread-pool.h"
56 
57 #include "scsi/constants.h"
58 #include "scsi/utils.h"
59 #include "pr-helper.h"
60 
61 #define PR_OUT_FIXED_PARAM_SIZE 24
62 
63 static char *socket_path;
64 static char *pidfile;
65 static enum { RUNNING, TERMINATE, TERMINATING } state;
66 static QIOChannelSocket *server_ioc;
67 static int server_watch;
68 static int num_active_sockets = 1;
69 static int noisy;
70 static int verbose;
71 
72 #ifdef CONFIG_LIBCAP
73 static int uid = -1;
74 static int gid = -1;
75 #endif
76 
77 static void compute_default_paths(void)
78 {
79     socket_path = qemu_get_local_state_pathname("run/qemu-pr-helper.sock");
80     pidfile = qemu_get_local_state_pathname("run/qemu-pr-helper.pid");
81 }
82 
83 static void usage(const char *name)
84 {
85     (printf) (
86 "Usage: %s [OPTIONS] FILE\n"
87 "Persistent Reservation helper program for QEMU\n"
88 "\n"
89 "  -h, --help                display this help and exit\n"
90 "  -V, --version             output version information and exit\n"
91 "\n"
92 "  -d, --daemon              run in the background\n"
93 "  -f, --pidfile=PATH        PID file when running as a daemon\n"
94 "                            (default '%s')\n"
95 "  -k, --socket=PATH         path to the unix socket\n"
96 "                            (default '%s')\n"
97 "  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
98 "                            specify tracing options\n"
99 #ifdef CONFIG_LIBCAP
100 "  -u, --user=USER           user to drop privileges to\n"
101 "  -g, --group=GROUP         group to drop privileges to\n"
102 #endif
103 "\n"
104 QEMU_HELP_BOTTOM "\n"
105     , name, pidfile, socket_path);
106 }
107 
108 static void version(const char *name)
109 {
110     printf(
111 "%s " QEMU_FULL_VERSION "\n"
112 "Written by Paolo Bonzini.\n"
113 "\n"
114 QEMU_COPYRIGHT "\n"
115 "This is free software; see the source for copying conditions.  There is NO\n"
116 "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
117     , name);
118 }
119 
120 /* SG_IO support */
121 
122 typedef struct PRHelperSGIOData {
123     int fd;
124     const uint8_t *cdb;
125     uint8_t *sense;
126     uint8_t *buf;
127     int sz;              /* input/output */
128     int dir;
129 } PRHelperSGIOData;
130 
131 static int do_sgio_worker(void *opaque)
132 {
133     PRHelperSGIOData *data = opaque;
134     struct sg_io_hdr io_hdr;
135     int ret;
136     int status;
137     SCSISense sense_code;
138 
139     memset(data->sense, 0, PR_HELPER_SENSE_SIZE);
140     memset(&io_hdr, 0, sizeof(io_hdr));
141     io_hdr.interface_id = 'S';
142     io_hdr.cmd_len = PR_HELPER_CDB_SIZE;
143     io_hdr.cmdp = (uint8_t *)data->cdb;
144     io_hdr.sbp = data->sense;
145     io_hdr.mx_sb_len = PR_HELPER_SENSE_SIZE;
146     io_hdr.timeout = 1;
147     io_hdr.dxfer_direction = data->dir;
148     io_hdr.dxferp = (char *)data->buf;
149     io_hdr.dxfer_len = data->sz;
150     ret = ioctl(data->fd, SG_IO, &io_hdr);
151     status = sg_io_sense_from_errno(ret < 0 ? errno : 0, &io_hdr,
152                                     &sense_code);
153     if (status == GOOD) {
154         data->sz -= io_hdr.resid;
155     } else {
156         data->sz = 0;
157     }
158 
159     if (status == CHECK_CONDITION &&
160         !(io_hdr.driver_status & SG_ERR_DRIVER_SENSE)) {
161         scsi_build_sense(data->sense, sense_code);
162     }
163 
164     return status;
165 }
166 
167 static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
168                     uint8_t *buf, int *sz, int dir)
169 {
170     ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
171     int r;
172 
173     PRHelperSGIOData data = {
174         .fd = fd,
175         .cdb = cdb,
176         .sense = sense,
177         .buf = buf,
178         .sz = *sz,
179         .dir = dir,
180     };
181 
182     r = thread_pool_submit_co(pool, do_sgio_worker, &data);
183     *sz = data.sz;
184     return r;
185 }
186 
187 /* Device mapper interface */
188 
189 #ifdef CONFIG_MPATH
190 #define CONTROL_PATH "/dev/mapper/control"
191 
192 typedef struct DMData {
193     struct dm_ioctl dm;
194     uint8_t data[1024];
195 } DMData;
196 
197 static int control_fd;
198 
199 static void *dm_ioctl(int ioc, struct dm_ioctl *dm)
200 {
201     static DMData d;
202     memcpy(&d.dm, dm, sizeof(d.dm));
203     QEMU_BUILD_BUG_ON(sizeof(d.data) < sizeof(struct dm_target_spec));
204 
205     d.dm.version[0] = DM_VERSION_MAJOR;
206     d.dm.version[1] = 0;
207     d.dm.version[2] = 0;
208     d.dm.data_size = 1024;
209     d.dm.data_start = offsetof(DMData, data);
210     if (ioctl(control_fd, ioc, &d) < 0) {
211         return NULL;
212     }
213     memcpy(dm, &d.dm, sizeof(d.dm));
214     return &d.data;
215 }
216 
217 static void *dm_dev_ioctl(int fd, int ioc, struct dm_ioctl *dm)
218 {
219     struct stat st;
220     int r;
221 
222     r = fstat(fd, &st);
223     if (r < 0) {
224         perror("fstat");
225         exit(1);
226     }
227 
228     dm->dev = st.st_rdev;
229     return dm_ioctl(ioc, dm);
230 }
231 
232 static void dm_init(void)
233 {
234     control_fd = open(CONTROL_PATH, O_RDWR);
235     if (control_fd < 0) {
236         perror("Cannot open " CONTROL_PATH);
237         exit(1);
238     }
239     struct dm_ioctl dm = { };
240     if (!dm_ioctl(DM_VERSION, &dm)) {
241         perror("ioctl");
242         exit(1);
243     }
244     if (dm.version[0] != DM_VERSION_MAJOR) {
245         fprintf(stderr, "Unsupported device mapper interface");
246         exit(1);
247     }
248 }
249 
250 /* Variables required by libmultipath and libmpathpersist.  */
251 QEMU_BUILD_BUG_ON(PR_HELPER_DATA_SIZE > MPATH_MAX_PARAM_LEN);
252 static struct config *multipath_conf;
253 unsigned mpath_mx_alloc_len = PR_HELPER_DATA_SIZE;
254 int logsink;
255 struct udev *udev;
256 
257 extern struct config *get_multipath_config(void);
258 struct config *get_multipath_config(void)
259 {
260     return multipath_conf;
261 }
262 
263 extern void put_multipath_config(struct config *conf);
264 void put_multipath_config(struct config *conf)
265 {
266 }
267 
268 static void multipath_pr_init(void)
269 {
270     udev = udev_new();
271 #ifdef CONFIG_MPATH_NEW_API
272     multipath_conf = mpath_lib_init();
273 #else
274     mpath_lib_init(udev);
275 #endif
276 }
277 
278 static int is_mpath(int fd)
279 {
280     struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG };
281     struct dm_target_spec *tgt;
282 
283     tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm);
284     if (!tgt) {
285         if (errno == ENXIO) {
286             return 0;
287         }
288         perror("ioctl");
289         exit(EXIT_FAILURE);
290     }
291     return !strncmp(tgt->target_type, "multipath", DM_MAX_TYPE_NAME);
292 }
293 
294 static SCSISense mpath_generic_sense(int r)
295 {
296     switch (r) {
297     case MPATH_PR_SENSE_NOT_READY:
298          return SENSE_CODE(NOT_READY);
299     case MPATH_PR_SENSE_MEDIUM_ERROR:
300          return SENSE_CODE(READ_ERROR);
301     case MPATH_PR_SENSE_HARDWARE_ERROR:
302          return SENSE_CODE(TARGET_FAILURE);
303     case MPATH_PR_SENSE_ABORTED_COMMAND:
304          return SENSE_CODE(IO_ERROR);
305     default:
306          abort();
307     }
308 }
309 
310 static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
311 {
312     switch (r) {
313     case MPATH_PR_SUCCESS:
314         return GOOD;
315     case MPATH_PR_SENSE_NOT_READY:
316     case MPATH_PR_SENSE_MEDIUM_ERROR:
317     case MPATH_PR_SENSE_HARDWARE_ERROR:
318     case MPATH_PR_SENSE_ABORTED_COMMAND:
319         {
320             /* libmpathpersist ate the exact sense.  Try to find it by
321              * issuing TEST UNIT READY.
322              */
323             uint8_t cdb[6] = { TEST_UNIT_READY };
324             int sz = 0;
325             int r = do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE);
326 
327             if (r != GOOD) {
328                 return r;
329             }
330             scsi_build_sense(sense, mpath_generic_sense(r));
331             return CHECK_CONDITION;
332         }
333 
334     case MPATH_PR_SENSE_UNIT_ATTENTION:
335         /* Congratulations libmpathpersist, you ruined the Unit Attention...
336          * Return a heavyweight one.
337          */
338         scsi_build_sense(sense, SENSE_CODE(SCSI_BUS_RESET));
339         return CHECK_CONDITION;
340     case MPATH_PR_SENSE_INVALID_OP:
341         /* Only one valid sense.  */
342         scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
343         return CHECK_CONDITION;
344     case MPATH_PR_ILLEGAL_REQ:
345         /* Guess.  */
346         scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
347         return CHECK_CONDITION;
348     case MPATH_PR_NO_SENSE:
349         scsi_build_sense(sense, SENSE_CODE(NO_SENSE));
350         return CHECK_CONDITION;
351 
352     case MPATH_PR_RESERV_CONFLICT:
353         return RESERVATION_CONFLICT;
354 
355     case MPATH_PR_OTHER:
356     default:
357         scsi_build_sense(sense, SENSE_CODE(LUN_COMM_FAILURE));
358         return CHECK_CONDITION;
359     }
360 }
361 
362 static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
363                            uint8_t *data, int sz)
364 {
365     int rq_servact = cdb[1];
366     struct prin_resp resp;
367     size_t written;
368     int r;
369 
370     switch (rq_servact) {
371     case MPATH_PRIN_RKEY_SA:
372     case MPATH_PRIN_RRES_SA:
373     case MPATH_PRIN_RCAP_SA:
374         break;
375     case MPATH_PRIN_RFSTAT_SA:
376         /* Nobody implements it anyway, so bail out. */
377     default:
378         /* Cannot parse any other output.  */
379         scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
380         return CHECK_CONDITION;
381     }
382 
383     r = mpath_persistent_reserve_in(fd, rq_servact, &resp, noisy, verbose);
384     if (r == MPATH_PR_SUCCESS) {
385         switch (rq_servact) {
386         case MPATH_PRIN_RKEY_SA:
387         case MPATH_PRIN_RRES_SA: {
388             struct prin_readdescr *out = &resp.prin_descriptor.prin_readkeys;
389             assert(sz >= 8);
390             written = MIN(out->additional_length + 8, sz);
391             stl_be_p(&data[0], out->prgeneration);
392             stl_be_p(&data[4], out->additional_length);
393             memcpy(&data[8], out->key_list, written - 8);
394             break;
395         }
396         case MPATH_PRIN_RCAP_SA: {
397             struct prin_capdescr *out = &resp.prin_descriptor.prin_readcap;
398             assert(sz >= 6);
399             written = 6;
400             stw_be_p(&data[0], out->length);
401             data[2] = out->flags[0];
402             data[3] = out->flags[1];
403             stw_be_p(&data[4], out->pr_type_mask);
404             break;
405         }
406         default:
407             scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
408             return CHECK_CONDITION;
409         }
410         assert(written <= sz);
411         memset(data + written, 0, sz - written);
412     }
413 
414     return mpath_reconstruct_sense(fd, r, sense);
415 }
416 
417 static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
418                             const uint8_t *param, int sz)
419 {
420     int rq_servact = cdb[1];
421     int rq_scope = cdb[2] >> 4;
422     int rq_type = cdb[2] & 0xf;
423     struct prout_param_descriptor paramp;
424     char transportids[PR_HELPER_DATA_SIZE];
425     int r;
426 
427     if (sz < PR_OUT_FIXED_PARAM_SIZE) {
428         /* Illegal request, Parameter list length error.  This isn't fatal;
429          * we have read the data, send an error without closing the socket.
430          */
431         scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
432         return CHECK_CONDITION;
433     }
434 
435     switch (rq_servact) {
436     case MPATH_PROUT_REG_SA:
437     case MPATH_PROUT_RES_SA:
438     case MPATH_PROUT_REL_SA:
439     case MPATH_PROUT_CLEAR_SA:
440     case MPATH_PROUT_PREE_SA:
441     case MPATH_PROUT_PREE_AB_SA:
442     case MPATH_PROUT_REG_IGN_SA:
443         break;
444     case MPATH_PROUT_REG_MOV_SA:
445         /* Not supported by struct prout_param_descriptor.  */
446     default:
447         /* Cannot parse any other input.  */
448         scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
449         return CHECK_CONDITION;
450     }
451 
452     /* Convert input data, especially transport IDs, to the structs
453      * used by libmpathpersist (which, of course, will immediately
454      * do the opposite).
455      */
456     memset(&paramp, 0, sizeof(paramp));
457     memcpy(&paramp.key, &param[0], 8);
458     memcpy(&paramp.sa_key, &param[8], 8);
459     paramp.sa_flags = param[20];
460     if (sz > PR_OUT_FIXED_PARAM_SIZE) {
461         size_t transportid_len;
462         int i, j;
463         if (sz < PR_OUT_FIXED_PARAM_SIZE + 4) {
464             scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
465             return CHECK_CONDITION;
466         }
467         transportid_len = ldl_be_p(&param[24]) + PR_OUT_FIXED_PARAM_SIZE + 4;
468         if (transportid_len > sz) {
469             scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
470             return CHECK_CONDITION;
471         }
472         for (i = PR_OUT_FIXED_PARAM_SIZE + 4, j = 0; i < transportid_len; ) {
473             struct transportid *id = (struct transportid *) &transportids[j];
474             int len;
475 
476             id->format_code = param[i] & 0xc0;
477             id->protocol_id = param[i] & 0x0f;
478             switch (param[i] & 0xcf) {
479             case 0:
480                 /* FC transport.  */
481                 if (i + 24 > transportid_len) {
482                     goto illegal_req;
483                 }
484                 memcpy(id->n_port_name, &param[i + 8], 8);
485                 j += offsetof(struct transportid, n_port_name[8]);
486                 i += 24;
487                 break;
488             case 5:
489             case 0x45:
490                 /* iSCSI transport.  */
491                 len = lduw_be_p(&param[i + 2]);
492                 if (len > 252 || (len & 3) || i + len + 4 > transportid_len) {
493                     /* For format code 00, the standard says the maximum is 223
494                      * plus the NUL terminator.  For format code 01 there is no
495                      * maximum length, but libmpathpersist ignores the first
496                      * byte of id->iscsi_name so our maximum is 252.
497                      */
498                     goto illegal_req;
499                 }
500                 if (memchr(&param[i + 4], 0, len) == NULL) {
501                     goto illegal_req;
502                 }
503                 memcpy(id->iscsi_name, &param[i + 2], len + 2);
504                 j += offsetof(struct transportid, iscsi_name[len + 2]);
505                 i += len + 4;
506                 break;
507             case 6:
508                 /* SAS transport.  */
509                 if (i + 24 > transportid_len) {
510                     goto illegal_req;
511                 }
512                 memcpy(id->sas_address, &param[i + 4], 8);
513                 j += offsetof(struct transportid, sas_address[8]);
514                 i += 24;
515                 break;
516             default:
517             illegal_req:
518                 scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
519                 return CHECK_CONDITION;
520             }
521 
522             paramp.trnptid_list[paramp.num_transportid++] = id;
523         }
524     }
525 
526     r = mpath_persistent_reserve_out(fd, rq_servact, rq_scope, rq_type,
527                                      &paramp, noisy, verbose);
528     return mpath_reconstruct_sense(fd, r, sense);
529 }
530 #endif
531 
532 static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
533                     uint8_t *data, int *resp_sz)
534 {
535 #ifdef CONFIG_MPATH
536     if (is_mpath(fd)) {
537         /* multipath_pr_in fills the whole input buffer.  */
538         int r = multipath_pr_in(fd, cdb, sense, data, *resp_sz);
539         if (r != GOOD) {
540             *resp_sz = 0;
541         }
542         return r;
543     }
544 #endif
545 
546     return do_sgio(fd, cdb, sense, data, resp_sz,
547                    SG_DXFER_FROM_DEV);
548 }
549 
550 static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
551                      const uint8_t *param, int sz)
552 {
553     int resp_sz;
554 
555     if ((fcntl(fd, F_GETFL) & O_ACCMODE) == O_RDONLY) {
556         scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
557         return CHECK_CONDITION;
558     }
559 
560 #ifdef CONFIG_MPATH
561     if (is_mpath(fd)) {
562         return multipath_pr_out(fd, cdb, sense, param, sz);
563     }
564 #endif
565 
566     resp_sz = sz;
567     return do_sgio(fd, cdb, sense, (uint8_t *)param, &resp_sz,
568                    SG_DXFER_TO_DEV);
569 }
570 
571 /* Client */
572 
573 typedef struct PRHelperClient {
574     QIOChannelSocket *ioc;
575     Coroutine *co;
576     int fd;
577     uint8_t data[PR_HELPER_DATA_SIZE];
578 } PRHelperClient;
579 
580 typedef struct PRHelperRequest {
581     int fd;
582     size_t sz;
583     uint8_t cdb[PR_HELPER_CDB_SIZE];
584 } PRHelperRequest;
585 
586 static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
587                                  Error **errp)
588 {
589     int ret = 0;
590 
591     while (sz > 0) {
592         int *fds = NULL;
593         size_t nfds = 0;
594         int i;
595         struct iovec iov;
596         ssize_t n_read;
597 
598         iov.iov_base = buf;
599         iov.iov_len = sz;
600         n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
601                                         &fds, &nfds, errp);
602 
603         if (n_read == QIO_CHANNEL_ERR_BLOCK) {
604             qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
605             continue;
606         }
607         if (n_read <= 0) {
608             ret = n_read ? n_read : -1;
609             goto err;
610         }
611 
612         /* Stash one file descriptor per request.  */
613         if (nfds) {
614             bool too_many = false;
615             for (i = 0; i < nfds; i++) {
616                 if (client->fd == -1) {
617                     client->fd = fds[i];
618                 } else {
619                     close(fds[i]);
620                     too_many = true;
621                 }
622             }
623             g_free(fds);
624             if (too_many) {
625                 ret = -1;
626                 goto err;
627             }
628         }
629 
630         buf += n_read;
631         sz -= n_read;
632     }
633 
634     return 0;
635 
636 err:
637     if (client->fd != -1) {
638         close(client->fd);
639         client->fd = -1;
640     }
641     return ret;
642 }
643 
644 static int coroutine_fn prh_read_request(PRHelperClient *client,
645                                          PRHelperRequest *req,
646                                          PRHelperResponse *resp, Error **errp)
647 {
648     uint32_t sz;
649 
650     if (prh_read(client, req->cdb, sizeof(req->cdb), NULL) < 0) {
651         return -1;
652     }
653 
654     if (client->fd == -1) {
655         error_setg(errp, "No file descriptor in request.");
656         return -1;
657     }
658 
659     if (req->cdb[0] != PERSISTENT_RESERVE_OUT &&
660         req->cdb[0] != PERSISTENT_RESERVE_IN) {
661         error_setg(errp, "Invalid CDB, closing socket.");
662         goto out_close;
663     }
664 
665     sz = scsi_cdb_xfer(req->cdb);
666     if (sz > sizeof(client->data)) {
667         goto out_close;
668     }
669 
670     if (req->cdb[0] == PERSISTENT_RESERVE_OUT) {
671         if (qio_channel_read_all(QIO_CHANNEL(client->ioc),
672                                  (char *)client->data, sz,
673                                  errp) < 0) {
674             goto out_close;
675         }
676     }
677 
678     req->fd = client->fd;
679     req->sz = sz;
680     client->fd = -1;
681     return sz;
682 
683 out_close:
684     close(client->fd);
685     client->fd = -1;
686     return -1;
687 }
688 
689 static int coroutine_fn prh_write_response(PRHelperClient *client,
690                                            PRHelperRequest *req,
691                                            PRHelperResponse *resp, Error **errp)
692 {
693     ssize_t r;
694     size_t sz;
695 
696     if (req->cdb[0] == PERSISTENT_RESERVE_IN && resp->result == GOOD) {
697         assert(resp->sz <= req->sz && resp->sz <= sizeof(client->data));
698     } else {
699         assert(resp->sz == 0);
700     }
701 
702     sz = resp->sz;
703 
704     resp->result = cpu_to_be32(resp->result);
705     resp->sz = cpu_to_be32(resp->sz);
706     r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
707                               (char *) resp, sizeof(*resp), errp);
708     if (r < 0) {
709         return r;
710     }
711 
712     r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
713                               (char *) client->data,
714                               sz, errp);
715     return r < 0 ? r : 0;
716 }
717 
718 static void coroutine_fn prh_co_entry(void *opaque)
719 {
720     PRHelperClient *client = opaque;
721     Error *local_err = NULL;
722     uint32_t flags;
723     int r;
724 
725     qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
726                              false, NULL);
727     qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
728                                    qemu_get_aio_context());
729 
730     /* A very simple negotiation for future extensibility.  No features
731      * are defined so write 0.
732      */
733     flags = cpu_to_be32(0);
734     r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
735                              (char *) &flags, sizeof(flags), NULL);
736     if (r < 0) {
737         goto out;
738     }
739 
740     r = qio_channel_read_all(QIO_CHANNEL(client->ioc),
741                              (char *) &flags, sizeof(flags), NULL);
742     if (be32_to_cpu(flags) != 0 || r < 0) {
743         goto out;
744     }
745 
746     while (atomic_read(&state) == RUNNING) {
747         PRHelperRequest req;
748         PRHelperResponse resp;
749         int sz;
750 
751         sz = prh_read_request(client, &req, &resp, &local_err);
752         if (sz < 0) {
753             break;
754         }
755 
756         num_active_sockets++;
757         if (req.cdb[0] == PERSISTENT_RESERVE_OUT) {
758             r = do_pr_out(req.fd, req.cdb, resp.sense,
759                           client->data, sz);
760             resp.sz = 0;
761         } else {
762             resp.sz = sizeof(client->data);
763             r = do_pr_in(req.fd, req.cdb, resp.sense,
764                          client->data, &resp.sz);
765             resp.sz = MIN(resp.sz, sz);
766         }
767         num_active_sockets--;
768         close(req.fd);
769         if (r == -1) {
770             break;
771         }
772         resp.result = r;
773 
774         if (prh_write_response(client, &req, &resp, &local_err) < 0) {
775             break;
776         }
777     }
778 
779     if (local_err) {
780         if (verbose == 0) {
781             error_free(local_err);
782         } else {
783             error_report_err(local_err);
784         }
785     }
786 
787 out:
788     qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
789     object_unref(OBJECT(client->ioc));
790     g_free(client);
791 }
792 
793 static gboolean accept_client(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
794 {
795     QIOChannelSocket *cioc;
796     PRHelperClient *prh;
797 
798     cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
799                                      NULL);
800     if (!cioc) {
801         return TRUE;
802     }
803 
804     prh = g_new(PRHelperClient, 1);
805     prh->ioc = cioc;
806     prh->fd = -1;
807     prh->co = qemu_coroutine_create(prh_co_entry, prh);
808     qemu_coroutine_enter(prh->co);
809 
810     return TRUE;
811 }
812 
813 static void termsig_handler(int signum)
814 {
815     atomic_cmpxchg(&state, RUNNING, TERMINATE);
816     qemu_notify_event();
817 }
818 
819 static void close_server_socket(void)
820 {
821     assert(server_ioc);
822 
823     g_source_remove(server_watch);
824     server_watch = -1;
825     object_unref(OBJECT(server_ioc));
826     num_active_sockets--;
827 }
828 
829 #ifdef CONFIG_LIBCAP
830 static int drop_privileges(void)
831 {
832     /* clear all capabilities */
833     capng_clear(CAPNG_SELECT_BOTH);
834 
835     if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
836                      CAP_SYS_RAWIO) < 0) {
837         return -1;
838     }
839 
840 #ifdef CONFIG_MPATH
841     /* For /dev/mapper/control ioctls */
842     if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
843                      CAP_SYS_ADMIN) < 0) {
844         return -1;
845     }
846 #endif
847 
848     /* Change user/group id, retaining the capabilities.  Because file descriptors
849      * are passed via SCM_RIGHTS, we don't need supplementary groups (and in
850      * fact the helper can run as "nobody").
851      */
852     if (capng_change_id(uid != -1 ? uid : getuid(),
853                         gid != -1 ? gid : getgid(),
854                         CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)) {
855         return -1;
856     }
857 
858     return 0;
859 }
860 #endif
861 
862 int main(int argc, char **argv)
863 {
864     const char *sopt = "hVk:f:dT:u:g:vq";
865     struct option lopt[] = {
866         { "help", no_argument, NULL, 'h' },
867         { "version", no_argument, NULL, 'V' },
868         { "socket", required_argument, NULL, 'k' },
869         { "pidfile", required_argument, NULL, 'f' },
870         { "daemon", no_argument, NULL, 'd' },
871         { "trace", required_argument, NULL, 'T' },
872         { "user", required_argument, NULL, 'u' },
873         { "group", required_argument, NULL, 'g' },
874         { "verbose", no_argument, NULL, 'v' },
875         { "quiet", no_argument, NULL, 'q' },
876         { NULL, 0, NULL, 0 }
877     };
878     int opt_ind = 0;
879     int loglevel = 1;
880     int quiet = 0;
881     int ch;
882     Error *local_err = NULL;
883     char *trace_file = NULL;
884     bool daemonize = false;
885     bool pidfile_specified = false;
886     bool socket_path_specified = false;
887     unsigned socket_activation;
888 
889     struct sigaction sa_sigterm;
890     memset(&sa_sigterm, 0, sizeof(sa_sigterm));
891     sa_sigterm.sa_handler = termsig_handler;
892     sigaction(SIGTERM, &sa_sigterm, NULL);
893     sigaction(SIGINT, &sa_sigterm, NULL);
894     sigaction(SIGHUP, &sa_sigterm, NULL);
895 
896     signal(SIGPIPE, SIG_IGN);
897 
898     module_call_init(MODULE_INIT_TRACE);
899     module_call_init(MODULE_INIT_QOM);
900     qemu_add_opts(&qemu_trace_opts);
901     qemu_init_exec_dir(argv[0]);
902 
903     compute_default_paths();
904 
905     while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
906         switch (ch) {
907         case 'k':
908             g_free(socket_path);
909             socket_path = g_strdup(optarg);
910             socket_path_specified = true;
911             if (socket_path[0] != '/') {
912                 error_report("socket path must be absolute");
913                 exit(EXIT_FAILURE);
914             }
915             break;
916         case 'f':
917             g_free(pidfile);
918             pidfile = g_strdup(optarg);
919             pidfile_specified = true;
920             break;
921 #ifdef CONFIG_LIBCAP
922         case 'u': {
923             unsigned long res;
924             struct passwd *userinfo = getpwnam(optarg);
925             if (userinfo) {
926                 uid = userinfo->pw_uid;
927             } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
928                        (uid_t)res == res) {
929                 uid = res;
930             } else {
931                 error_report("invalid user '%s'", optarg);
932                 exit(EXIT_FAILURE);
933             }
934             break;
935         }
936         case 'g': {
937             unsigned long res;
938             struct group *groupinfo = getgrnam(optarg);
939             if (groupinfo) {
940                 gid = groupinfo->gr_gid;
941             } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
942                        (gid_t)res == res) {
943                 gid = res;
944             } else {
945                 error_report("invalid group '%s'", optarg);
946                 exit(EXIT_FAILURE);
947             }
948             break;
949         }
950 #else
951         case 'u':
952         case 'g':
953             error_report("-%c not supported by this %s", ch, argv[0]);
954             exit(1);
955 #endif
956         case 'd':
957             daemonize = true;
958             break;
959         case 'q':
960             quiet = 1;
961             break;
962         case 'v':
963             ++loglevel;
964             break;
965         case 'T':
966             g_free(trace_file);
967             trace_file = trace_opt_parse(optarg);
968             break;
969         case 'V':
970             version(argv[0]);
971             exit(EXIT_SUCCESS);
972             break;
973         case 'h':
974             usage(argv[0]);
975             exit(EXIT_SUCCESS);
976             break;
977         case '?':
978             error_report("Try `%s --help' for more information.", argv[0]);
979             exit(EXIT_FAILURE);
980         }
981     }
982 
983     /* set verbosity */
984     noisy = !quiet && (loglevel >= 3);
985     verbose = quiet ? 0 : MIN(loglevel, 3);
986 
987     if (!trace_init_backends()) {
988         exit(EXIT_FAILURE);
989     }
990     trace_init_file(trace_file);
991     qemu_set_log(LOG_TRACE);
992 
993 #ifdef CONFIG_MPATH
994     dm_init();
995     multipath_pr_init();
996 #endif
997 
998     socket_activation = check_socket_activation();
999     if (socket_activation == 0) {
1000         SocketAddress saddr;
1001         saddr = (SocketAddress){
1002             .type = SOCKET_ADDRESS_TYPE_UNIX,
1003             .u.q_unix.path = socket_path,
1004         };
1005         server_ioc = qio_channel_socket_new();
1006         if (qio_channel_socket_listen_sync(server_ioc, &saddr, &local_err) < 0) {
1007             object_unref(OBJECT(server_ioc));
1008             error_report_err(local_err);
1009             return 1;
1010         }
1011     } else {
1012         /* Using socket activation - check user didn't use -p etc. */
1013         if (socket_path_specified) {
1014             error_report("Unix socket can't be set when using socket activation");
1015             exit(EXIT_FAILURE);
1016         }
1017 
1018         /* Can only listen on a single socket.  */
1019         if (socket_activation > 1) {
1020             error_report("%s does not support socket activation with LISTEN_FDS > 1",
1021                          argv[0]);
1022             exit(EXIT_FAILURE);
1023         }
1024         server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
1025                                                &local_err);
1026         if (server_ioc == NULL) {
1027             error_report("Failed to use socket activation: %s",
1028                          error_get_pretty(local_err));
1029             exit(EXIT_FAILURE);
1030         }
1031     }
1032 
1033     if (qemu_init_main_loop(&local_err)) {
1034         error_report_err(local_err);
1035         exit(EXIT_FAILURE);
1036     }
1037 
1038     server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
1039                                          G_IO_IN,
1040                                          accept_client,
1041                                          NULL, NULL);
1042 
1043     if (daemonize) {
1044         if (daemon(0, 0) < 0) {
1045             error_report("Failed to daemonize: %s", strerror(errno));
1046             exit(EXIT_FAILURE);
1047         }
1048     }
1049 
1050     if ((daemonize || pidfile_specified) &&
1051         !qemu_write_pidfile(pidfile, &local_err)) {
1052         error_report_err(local_err);
1053         exit(EXIT_FAILURE);
1054     }
1055 
1056 #ifdef CONFIG_LIBCAP
1057     if (drop_privileges() < 0) {
1058         error_report("Failed to drop privileges: %s", strerror(errno));
1059         exit(EXIT_FAILURE);
1060     }
1061 #endif
1062 
1063     state = RUNNING;
1064     do {
1065         main_loop_wait(false);
1066         if (state == TERMINATE) {
1067             state = TERMINATING;
1068             close_server_socket();
1069         }
1070     } while (num_active_sockets > 0);
1071 
1072     exit(EXIT_SUCCESS);
1073 }
1074