xref: /qemu/qga/commands-posix.c (revision 8b7b9c5c)
1 /*
2  * QEMU Guest Agent POSIX-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include <sys/ioctl.h>
16 #include <sys/utsname.h>
17 #include <sys/wait.h>
18 #include <dirent.h>
19 #include "qga-qapi-commands.h"
20 #include "qapi/error.h"
21 #include "qapi/qmp/qerror.h"
22 #include "qemu/host-utils.h"
23 #include "qemu/sockets.h"
24 #include "qemu/base64.h"
25 #include "qemu/cutils.h"
26 #include "commands-common.h"
27 #include "block/nvme.h"
28 #include "cutils.h"
29 
30 #ifdef HAVE_UTMPX
31 #include <utmpx.h>
32 #endif
33 
34 #if defined(__linux__)
35 #include <mntent.h>
36 #include <sys/statvfs.h>
37 #include <linux/nvme_ioctl.h>
38 
39 #ifdef CONFIG_LIBUDEV
40 #include <libudev.h>
41 #endif
42 #endif
43 
44 #ifdef HAVE_GETIFADDRS
45 #include <arpa/inet.h>
46 #include <sys/socket.h>
47 #include <net/if.h>
48 #if defined(__NetBSD__) || defined(__OpenBSD__)
49 #include <net/if_arp.h>
50 #include <netinet/if_ether.h>
51 #else
52 #include <net/ethernet.h>
53 #endif
54 #ifdef CONFIG_SOLARIS
55 #include <sys/sockio.h>
56 #endif
57 #endif
58 
59 static void ga_wait_child(pid_t pid, int *status, Error **errp)
60 {
61     pid_t rpid;
62 
63     *status = 0;
64 
65     rpid = RETRY_ON_EINTR(waitpid(pid, status, 0));
66 
67     if (rpid == -1) {
68         error_setg_errno(errp, errno, "failed to wait for child (pid: %d)",
69                          pid);
70         return;
71     }
72 
73     g_assert(rpid == pid);
74 }
75 
76 void qmp_guest_shutdown(const char *mode, Error **errp)
77 {
78     const char *shutdown_flag;
79     Error *local_err = NULL;
80     pid_t pid;
81     int status;
82 
83 #ifdef CONFIG_SOLARIS
84     const char *powerdown_flag = "-i5";
85     const char *halt_flag = "-i0";
86     const char *reboot_flag = "-i6";
87 #elif defined(CONFIG_BSD)
88     const char *powerdown_flag = "-p";
89     const char *halt_flag = "-h";
90     const char *reboot_flag = "-r";
91 #else
92     const char *powerdown_flag = "-P";
93     const char *halt_flag = "-H";
94     const char *reboot_flag = "-r";
95 #endif
96 
97     slog("guest-shutdown called, mode: %s", mode);
98     if (!mode || strcmp(mode, "powerdown") == 0) {
99         shutdown_flag = powerdown_flag;
100     } else if (strcmp(mode, "halt") == 0) {
101         shutdown_flag = halt_flag;
102     } else if (strcmp(mode, "reboot") == 0) {
103         shutdown_flag = reboot_flag;
104     } else {
105         error_setg(errp,
106                    "mode is invalid (valid values are: halt|powerdown|reboot");
107         return;
108     }
109 
110     pid = fork();
111     if (pid == 0) {
112         /* child, start the shutdown */
113         setsid();
114         reopen_fd_to_null(0);
115         reopen_fd_to_null(1);
116         reopen_fd_to_null(2);
117 
118 #ifdef CONFIG_SOLARIS
119         execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y",
120               "hypervisor initiated shutdown", (char *)NULL);
121 #elif defined(CONFIG_BSD)
122         execl("/sbin/shutdown", "shutdown", shutdown_flag, "+0",
123                "hypervisor initiated shutdown", (char *)NULL);
124 #else
125         execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
126                "hypervisor initiated shutdown", (char *)NULL);
127 #endif
128         _exit(EXIT_FAILURE);
129     } else if (pid < 0) {
130         error_setg_errno(errp, errno, "failed to create child process");
131         return;
132     }
133 
134     ga_wait_child(pid, &status, &local_err);
135     if (local_err) {
136         error_propagate(errp, local_err);
137         return;
138     }
139 
140     if (!WIFEXITED(status)) {
141         error_setg(errp, "child process has terminated abnormally");
142         return;
143     }
144 
145     if (WEXITSTATUS(status)) {
146         error_setg(errp, "child process has failed to shutdown");
147         return;
148     }
149 
150     /* succeeded */
151 }
152 
153 void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
154 {
155     int ret;
156     int status;
157     pid_t pid;
158     Error *local_err = NULL;
159     struct timeval tv;
160     static const char hwclock_path[] = "/sbin/hwclock";
161     static int hwclock_available = -1;
162 
163     if (hwclock_available < 0) {
164         hwclock_available = (access(hwclock_path, X_OK) == 0);
165     }
166 
167     if (!hwclock_available) {
168         error_setg(errp, QERR_UNSUPPORTED);
169         return;
170     }
171 
172     /* If user has passed a time, validate and set it. */
173     if (has_time) {
174         GDate date = { 0, };
175 
176         /* year-2038 will overflow in case time_t is 32bit */
177         if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
178             error_setg(errp, "Time %" PRId64 " is too large", time_ns);
179             return;
180         }
181 
182         tv.tv_sec = time_ns / 1000000000;
183         tv.tv_usec = (time_ns % 1000000000) / 1000;
184         g_date_set_time_t(&date, tv.tv_sec);
185         if (date.year < 1970 || date.year >= 2070) {
186             error_setg_errno(errp, errno, "Invalid time");
187             return;
188         }
189 
190         ret = settimeofday(&tv, NULL);
191         if (ret < 0) {
192             error_setg_errno(errp, errno, "Failed to set time to guest");
193             return;
194         }
195     }
196 
197     /* Now, if user has passed a time to set and the system time is set, we
198      * just need to synchronize the hardware clock. However, if no time was
199      * passed, user is requesting the opposite: set the system time from the
200      * hardware clock (RTC). */
201     pid = fork();
202     if (pid == 0) {
203         setsid();
204         reopen_fd_to_null(0);
205         reopen_fd_to_null(1);
206         reopen_fd_to_null(2);
207 
208         /* Use '/sbin/hwclock -w' to set RTC from the system time,
209          * or '/sbin/hwclock -s' to set the system time from RTC. */
210         execl(hwclock_path, "hwclock", has_time ? "-w" : "-s", NULL);
211         _exit(EXIT_FAILURE);
212     } else if (pid < 0) {
213         error_setg_errno(errp, errno, "failed to create child process");
214         return;
215     }
216 
217     ga_wait_child(pid, &status, &local_err);
218     if (local_err) {
219         error_propagate(errp, local_err);
220         return;
221     }
222 
223     if (!WIFEXITED(status)) {
224         error_setg(errp, "child process has terminated abnormally");
225         return;
226     }
227 
228     if (WEXITSTATUS(status)) {
229         error_setg(errp, "hwclock failed to set hardware clock to system time");
230         return;
231     }
232 }
233 
234 typedef enum {
235     RW_STATE_NEW,
236     RW_STATE_READING,
237     RW_STATE_WRITING,
238 } RwState;
239 
240 struct GuestFileHandle {
241     uint64_t id;
242     FILE *fh;
243     RwState state;
244     QTAILQ_ENTRY(GuestFileHandle) next;
245 };
246 
247 static struct {
248     QTAILQ_HEAD(, GuestFileHandle) filehandles;
249 } guest_file_state = {
250     .filehandles = QTAILQ_HEAD_INITIALIZER(guest_file_state.filehandles),
251 };
252 
253 static int64_t guest_file_handle_add(FILE *fh, Error **errp)
254 {
255     GuestFileHandle *gfh;
256     int64_t handle;
257 
258     handle = ga_get_fd_handle(ga_state, errp);
259     if (handle < 0) {
260         return -1;
261     }
262 
263     gfh = g_new0(GuestFileHandle, 1);
264     gfh->id = handle;
265     gfh->fh = fh;
266     QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
267 
268     return handle;
269 }
270 
271 GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
272 {
273     GuestFileHandle *gfh;
274 
275     QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
276     {
277         if (gfh->id == id) {
278             return gfh;
279         }
280     }
281 
282     error_setg(errp, "handle '%" PRId64 "' has not been found", id);
283     return NULL;
284 }
285 
286 typedef const char * const ccpc;
287 
288 #ifndef O_BINARY
289 #define O_BINARY 0
290 #endif
291 
292 /* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
293 static const struct {
294     ccpc *forms;
295     int oflag_base;
296 } guest_file_open_modes[] = {
297     { (ccpc[]){ "r",          NULL }, O_RDONLY                                 },
298     { (ccpc[]){ "rb",         NULL }, O_RDONLY                      | O_BINARY },
299     { (ccpc[]){ "w",          NULL }, O_WRONLY | O_CREAT | O_TRUNC             },
300     { (ccpc[]){ "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  | O_BINARY },
301     { (ccpc[]){ "a",          NULL }, O_WRONLY | O_CREAT | O_APPEND            },
302     { (ccpc[]){ "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND | O_BINARY },
303     { (ccpc[]){ "r+",         NULL }, O_RDWR                                   },
304     { (ccpc[]){ "rb+", "r+b", NULL }, O_RDWR                        | O_BINARY },
305     { (ccpc[]){ "w+",         NULL }, O_RDWR   | O_CREAT | O_TRUNC             },
306     { (ccpc[]){ "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  | O_BINARY },
307     { (ccpc[]){ "a+",         NULL }, O_RDWR   | O_CREAT | O_APPEND            },
308     { (ccpc[]){ "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND | O_BINARY }
309 };
310 
311 static int
312 find_open_flag(const char *mode_str, Error **errp)
313 {
314     unsigned mode;
315 
316     for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
317         ccpc *form;
318 
319         form = guest_file_open_modes[mode].forms;
320         while (*form != NULL && strcmp(*form, mode_str) != 0) {
321             ++form;
322         }
323         if (*form != NULL) {
324             break;
325         }
326     }
327 
328     if (mode == ARRAY_SIZE(guest_file_open_modes)) {
329         error_setg(errp, "invalid file open mode '%s'", mode_str);
330         return -1;
331     }
332     return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
333 }
334 
335 #define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
336                                S_IRGRP | S_IWGRP | \
337                                S_IROTH | S_IWOTH)
338 
339 static FILE *
340 safe_open_or_create(const char *path, const char *mode, Error **errp)
341 {
342     int oflag;
343     int fd = -1;
344     FILE *f = NULL;
345 
346     oflag = find_open_flag(mode, errp);
347     if (oflag < 0) {
348         goto end;
349     }
350 
351     /* If the caller wants / allows creation of a new file, we implement it
352      * with a two step process: open() + (open() / fchmod()).
353      *
354      * First we insist on creating the file exclusively as a new file. If
355      * that succeeds, we're free to set any file-mode bits on it. (The
356      * motivation is that we want to set those file-mode bits independently
357      * of the current umask.)
358      *
359      * If the exclusive creation fails because the file already exists
360      * (EEXIST is not possible for any other reason), we just attempt to
361      * open the file, but in this case we won't be allowed to change the
362      * file-mode bits on the preexistent file.
363      *
364      * The pathname should never disappear between the two open()s in
365      * practice. If it happens, then someone very likely tried to race us.
366      * In this case just go ahead and report the ENOENT from the second
367      * open() to the caller.
368      *
369      * If the caller wants to open a preexistent file, then the first
370      * open() is decisive and its third argument is ignored, and the second
371      * open() and the fchmod() are never called.
372      */
373     fd = qga_open_cloexec(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
374     if (fd == -1 && errno == EEXIST) {
375         oflag &= ~(unsigned)O_CREAT;
376         fd = qga_open_cloexec(path, oflag, 0);
377     }
378     if (fd == -1) {
379         error_setg_errno(errp, errno,
380                          "failed to open file '%s' (mode: '%s')",
381                          path, mode);
382         goto end;
383     }
384 
385     if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
386         error_setg_errno(errp, errno, "failed to set permission "
387                          "0%03o on new file '%s' (mode: '%s')",
388                          (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
389         goto end;
390     }
391 
392     f = fdopen(fd, mode);
393     if (f == NULL) {
394         error_setg_errno(errp, errno, "failed to associate stdio stream with "
395                          "file descriptor %d, file '%s' (mode: '%s')",
396                          fd, path, mode);
397     }
398 
399 end:
400     if (f == NULL && fd != -1) {
401         close(fd);
402         if (oflag & O_CREAT) {
403             unlink(path);
404         }
405     }
406     return f;
407 }
408 
409 int64_t qmp_guest_file_open(const char *path, const char *mode,
410                             Error **errp)
411 {
412     FILE *fh;
413     Error *local_err = NULL;
414     int64_t handle;
415 
416     if (!mode) {
417         mode = "r";
418     }
419     slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
420     fh = safe_open_or_create(path, mode, &local_err);
421     if (local_err != NULL) {
422         error_propagate(errp, local_err);
423         return -1;
424     }
425 
426     /* set fd non-blocking to avoid common use cases (like reading from a
427      * named pipe) from hanging the agent
428      */
429     if (!g_unix_set_fd_nonblocking(fileno(fh), true, NULL)) {
430         fclose(fh);
431         error_setg_errno(errp, errno, "Failed to set FD nonblocking");
432         return -1;
433     }
434 
435     handle = guest_file_handle_add(fh, errp);
436     if (handle < 0) {
437         fclose(fh);
438         return -1;
439     }
440 
441     slog("guest-file-open, handle: %" PRId64, handle);
442     return handle;
443 }
444 
445 void qmp_guest_file_close(int64_t handle, Error **errp)
446 {
447     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
448     int ret;
449 
450     slog("guest-file-close called, handle: %" PRId64, handle);
451     if (!gfh) {
452         return;
453     }
454 
455     ret = fclose(gfh->fh);
456     if (ret == EOF) {
457         error_setg_errno(errp, errno, "failed to close handle");
458         return;
459     }
460 
461     QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
462     g_free(gfh);
463 }
464 
465 GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
466                                       int64_t count, Error **errp)
467 {
468     GuestFileRead *read_data = NULL;
469     guchar *buf;
470     FILE *fh = gfh->fh;
471     size_t read_count;
472 
473     /* explicitly flush when switching from writing to reading */
474     if (gfh->state == RW_STATE_WRITING) {
475         int ret = fflush(fh);
476         if (ret == EOF) {
477             error_setg_errno(errp, errno, "failed to flush file");
478             return NULL;
479         }
480         gfh->state = RW_STATE_NEW;
481     }
482 
483     buf = g_malloc0(count + 1);
484     read_count = fread(buf, 1, count, fh);
485     if (ferror(fh)) {
486         error_setg_errno(errp, errno, "failed to read file");
487     } else {
488         buf[read_count] = 0;
489         read_data = g_new0(GuestFileRead, 1);
490         read_data->count = read_count;
491         read_data->eof = feof(fh);
492         if (read_count) {
493             read_data->buf_b64 = g_base64_encode(buf, read_count);
494         }
495         gfh->state = RW_STATE_READING;
496     }
497     g_free(buf);
498     clearerr(fh);
499 
500     return read_data;
501 }
502 
503 GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
504                                      bool has_count, int64_t count,
505                                      Error **errp)
506 {
507     GuestFileWrite *write_data = NULL;
508     guchar *buf;
509     gsize buf_len;
510     int write_count;
511     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
512     FILE *fh;
513 
514     if (!gfh) {
515         return NULL;
516     }
517 
518     fh = gfh->fh;
519 
520     if (gfh->state == RW_STATE_READING) {
521         int ret = fseek(fh, 0, SEEK_CUR);
522         if (ret == -1) {
523             error_setg_errno(errp, errno, "failed to seek file");
524             return NULL;
525         }
526         gfh->state = RW_STATE_NEW;
527     }
528 
529     buf = qbase64_decode(buf_b64, -1, &buf_len, errp);
530     if (!buf) {
531         return NULL;
532     }
533 
534     if (!has_count) {
535         count = buf_len;
536     } else if (count < 0 || count > buf_len) {
537         error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
538                    count);
539         g_free(buf);
540         return NULL;
541     }
542 
543     write_count = fwrite(buf, 1, count, fh);
544     if (ferror(fh)) {
545         error_setg_errno(errp, errno, "failed to write to file");
546         slog("guest-file-write failed, handle: %" PRId64, handle);
547     } else {
548         write_data = g_new0(GuestFileWrite, 1);
549         write_data->count = write_count;
550         write_data->eof = feof(fh);
551         gfh->state = RW_STATE_WRITING;
552     }
553     g_free(buf);
554     clearerr(fh);
555 
556     return write_data;
557 }
558 
559 struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
560                                           GuestFileWhence *whence_code,
561                                           Error **errp)
562 {
563     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
564     GuestFileSeek *seek_data = NULL;
565     FILE *fh;
566     int ret;
567     int whence;
568     Error *err = NULL;
569 
570     if (!gfh) {
571         return NULL;
572     }
573 
574     /* We stupidly exposed 'whence':'int' in our qapi */
575     whence = ga_parse_whence(whence_code, &err);
576     if (err) {
577         error_propagate(errp, err);
578         return NULL;
579     }
580 
581     fh = gfh->fh;
582     ret = fseek(fh, offset, whence);
583     if (ret == -1) {
584         error_setg_errno(errp, errno, "failed to seek file");
585         if (errno == ESPIPE) {
586             /* file is non-seekable, stdio shouldn't be buffering anyways */
587             gfh->state = RW_STATE_NEW;
588         }
589     } else {
590         seek_data = g_new0(GuestFileSeek, 1);
591         seek_data->position = ftell(fh);
592         seek_data->eof = feof(fh);
593         gfh->state = RW_STATE_NEW;
594     }
595     clearerr(fh);
596 
597     return seek_data;
598 }
599 
600 void qmp_guest_file_flush(int64_t handle, Error **errp)
601 {
602     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
603     FILE *fh;
604     int ret;
605 
606     if (!gfh) {
607         return;
608     }
609 
610     fh = gfh->fh;
611     ret = fflush(fh);
612     if (ret == EOF) {
613         error_setg_errno(errp, errno, "failed to flush file");
614     } else {
615         gfh->state = RW_STATE_NEW;
616     }
617 }
618 
619 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
620 void free_fs_mount_list(FsMountList *mounts)
621 {
622      FsMount *mount, *temp;
623 
624      if (!mounts) {
625          return;
626      }
627 
628      QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
629          QTAILQ_REMOVE(mounts, mount, next);
630          g_free(mount->dirname);
631          g_free(mount->devtype);
632          g_free(mount);
633      }
634 }
635 #endif
636 
637 #if defined(CONFIG_FSFREEZE)
638 typedef enum {
639     FSFREEZE_HOOK_THAW = 0,
640     FSFREEZE_HOOK_FREEZE,
641 } FsfreezeHookArg;
642 
643 static const char *fsfreeze_hook_arg_string[] = {
644     "thaw",
645     "freeze",
646 };
647 
648 static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
649 {
650     int status;
651     pid_t pid;
652     const char *hook;
653     const char *arg_str = fsfreeze_hook_arg_string[arg];
654     Error *local_err = NULL;
655 
656     hook = ga_fsfreeze_hook(ga_state);
657     if (!hook) {
658         return;
659     }
660     if (access(hook, X_OK) != 0) {
661         error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
662         return;
663     }
664 
665     slog("executing fsfreeze hook with arg '%s'", arg_str);
666     pid = fork();
667     if (pid == 0) {
668         setsid();
669         reopen_fd_to_null(0);
670         reopen_fd_to_null(1);
671         reopen_fd_to_null(2);
672 
673         execl(hook, hook, arg_str, NULL);
674         _exit(EXIT_FAILURE);
675     } else if (pid < 0) {
676         error_setg_errno(errp, errno, "failed to create child process");
677         return;
678     }
679 
680     ga_wait_child(pid, &status, &local_err);
681     if (local_err) {
682         error_propagate(errp, local_err);
683         return;
684     }
685 
686     if (!WIFEXITED(status)) {
687         error_setg(errp, "fsfreeze hook has terminated abnormally");
688         return;
689     }
690 
691     status = WEXITSTATUS(status);
692     if (status) {
693         error_setg(errp, "fsfreeze hook has failed with status %d", status);
694         return;
695     }
696 }
697 
698 /*
699  * Return status of freeze/thaw
700  */
701 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
702 {
703     if (ga_is_frozen(ga_state)) {
704         return GUEST_FSFREEZE_STATUS_FROZEN;
705     }
706 
707     return GUEST_FSFREEZE_STATUS_THAWED;
708 }
709 
710 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
711 {
712     return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
713 }
714 
715 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
716                                        strList *mountpoints,
717                                        Error **errp)
718 {
719     int ret;
720     FsMountList mounts;
721     Error *local_err = NULL;
722 
723     slog("guest-fsfreeze called");
724 
725     execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
726     if (local_err) {
727         error_propagate(errp, local_err);
728         return -1;
729     }
730 
731     QTAILQ_INIT(&mounts);
732     if (!build_fs_mount_list(&mounts, &local_err)) {
733         error_propagate(errp, local_err);
734         return -1;
735     }
736 
737     /* cannot risk guest agent blocking itself on a write in this state */
738     ga_set_frozen(ga_state);
739 
740     ret = qmp_guest_fsfreeze_do_freeze_list(has_mountpoints, mountpoints,
741                                             mounts, errp);
742 
743     free_fs_mount_list(&mounts);
744     /* We may not issue any FIFREEZE here.
745      * Just unset ga_state here and ready for the next call.
746      */
747     if (ret == 0) {
748         ga_unset_frozen(ga_state);
749     } else if (ret < 0) {
750         qmp_guest_fsfreeze_thaw(NULL);
751     }
752     return ret;
753 }
754 
755 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
756 {
757     int ret;
758 
759     ret = qmp_guest_fsfreeze_do_thaw(errp);
760     if (ret >= 0) {
761         ga_unset_frozen(ga_state);
762         execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
763     } else {
764         ret = 0;
765     }
766 
767     return ret;
768 }
769 
770 static void guest_fsfreeze_cleanup(void)
771 {
772     Error *err = NULL;
773 
774     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
775         qmp_guest_fsfreeze_thaw(&err);
776         if (err) {
777             slog("failed to clean up frozen filesystems: %s",
778                  error_get_pretty(err));
779             error_free(err);
780         }
781     }
782 }
783 #endif
784 
785 /* linux-specific implementations. avoid this if at all possible. */
786 #if defined(__linux__)
787 #if defined(CONFIG_FSFREEZE)
788 
789 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
790 {
791     char *path;
792     char *dpath;
793     char *driver = NULL;
794     char buf[PATH_MAX];
795     ssize_t len;
796 
797     path = g_strndup(syspath, pathlen);
798     dpath = g_strdup_printf("%s/driver", path);
799     len = readlink(dpath, buf, sizeof(buf) - 1);
800     if (len != -1) {
801         buf[len] = 0;
802         driver = g_path_get_basename(buf);
803     }
804     g_free(dpath);
805     g_free(path);
806     return driver;
807 }
808 
809 static int compare_uint(const void *_a, const void *_b)
810 {
811     unsigned int a = *(unsigned int *)_a;
812     unsigned int b = *(unsigned int *)_b;
813 
814     return a < b ? -1 : a > b ? 1 : 0;
815 }
816 
817 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
818 static int build_hosts(char const *syspath, char const *host, bool ata,
819                        unsigned int *hosts, int hosts_max, Error **errp)
820 {
821     char *path;
822     DIR *dir;
823     struct dirent *entry;
824     int i = 0;
825 
826     path = g_strndup(syspath, host - syspath);
827     dir = opendir(path);
828     if (!dir) {
829         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
830         g_free(path);
831         return -1;
832     }
833 
834     while (i < hosts_max) {
835         entry = readdir(dir);
836         if (!entry) {
837             break;
838         }
839         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
840             ++i;
841         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
842             ++i;
843         }
844     }
845 
846     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
847 
848     g_free(path);
849     closedir(dir);
850     return i;
851 }
852 
853 /*
854  * Store disk device info for devices on the PCI bus.
855  * Returns true if information has been stored, or false for failure.
856  */
857 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
858                                            GuestDiskAddress *disk,
859                                            Error **errp)
860 {
861     unsigned int pci[4], host, hosts[8], tgt[3];
862     int i, nhosts = 0, pcilen;
863     GuestPCIAddress *pciaddr = disk->pci_controller;
864     bool has_ata = false, has_host = false, has_tgt = false;
865     char *p, *q, *driver = NULL;
866     bool ret = false;
867 
868     p = strstr(syspath, "/devices/pci");
869     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
870                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
871         g_debug("only pci device is supported: sysfs path '%s'", syspath);
872         return false;
873     }
874 
875     p += 12 + pcilen;
876     while (true) {
877         driver = get_pci_driver(syspath, p - syspath, errp);
878         if (driver && (g_str_equal(driver, "ata_piix") ||
879                        g_str_equal(driver, "sym53c8xx") ||
880                        g_str_equal(driver, "virtio-pci") ||
881                        g_str_equal(driver, "ahci") ||
882                        g_str_equal(driver, "nvme") ||
883                        g_str_equal(driver, "xhci_hcd") ||
884                        g_str_equal(driver, "ehci-pci"))) {
885             break;
886         }
887 
888         g_free(driver);
889         if (sscanf(p, "/%x:%x:%x.%x%n",
890                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
891             p += pcilen;
892             continue;
893         }
894 
895         g_debug("unsupported driver or sysfs path '%s'", syspath);
896         return false;
897     }
898 
899     p = strstr(syspath, "/target");
900     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
901                     tgt, tgt + 1, tgt + 2) == 3) {
902         has_tgt = true;
903     }
904 
905     p = strstr(syspath, "/ata");
906     if (p) {
907         q = p + 4;
908         has_ata = true;
909     } else {
910         p = strstr(syspath, "/host");
911         q = p + 5;
912     }
913     if (p && sscanf(q, "%u", &host) == 1) {
914         has_host = true;
915         nhosts = build_hosts(syspath, p, has_ata, hosts,
916                              ARRAY_SIZE(hosts), errp);
917         if (nhosts < 0) {
918             goto cleanup;
919         }
920     }
921 
922     pciaddr->domain = pci[0];
923     pciaddr->bus = pci[1];
924     pciaddr->slot = pci[2];
925     pciaddr->function = pci[3];
926 
927     if (strcmp(driver, "ata_piix") == 0) {
928         /* a host per ide bus, target*:0:<unit>:0 */
929         if (!has_host || !has_tgt) {
930             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
931             goto cleanup;
932         }
933         for (i = 0; i < nhosts; i++) {
934             if (host == hosts[i]) {
935                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
936                 disk->bus = i;
937                 disk->unit = tgt[1];
938                 break;
939             }
940         }
941         if (i >= nhosts) {
942             g_debug("no host for '%s' (driver '%s')", syspath, driver);
943             goto cleanup;
944         }
945     } else if (strcmp(driver, "sym53c8xx") == 0) {
946         /* scsi(LSI Logic): target*:0:<unit>:0 */
947         if (!has_tgt) {
948             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
949             goto cleanup;
950         }
951         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
952         disk->unit = tgt[1];
953     } else if (strcmp(driver, "virtio-pci") == 0) {
954         if (has_tgt) {
955             /* virtio-scsi: target*:0:0:<unit> */
956             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
957             disk->unit = tgt[2];
958         } else {
959             /* virtio-blk: 1 disk per 1 device */
960             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
961         }
962     } else if (strcmp(driver, "ahci") == 0) {
963         /* ahci: 1 host per 1 unit */
964         if (!has_host || !has_tgt) {
965             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
966             goto cleanup;
967         }
968         for (i = 0; i < nhosts; i++) {
969             if (host == hosts[i]) {
970                 disk->unit = i;
971                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
972                 break;
973             }
974         }
975         if (i >= nhosts) {
976             g_debug("no host for '%s' (driver '%s')", syspath, driver);
977             goto cleanup;
978         }
979     } else if (strcmp(driver, "nvme") == 0) {
980         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
981     } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
982         disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
983     } else {
984         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
985         goto cleanup;
986     }
987 
988     ret = true;
989 
990 cleanup:
991     g_free(driver);
992     return ret;
993 }
994 
995 /*
996  * Store disk device info for non-PCI virtio devices (for example s390x
997  * channel I/O devices). Returns true if information has been stored, or
998  * false for failure.
999  */
1000 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
1001                                                  GuestDiskAddress *disk,
1002                                                  Error **errp)
1003 {
1004     unsigned int tgt[3];
1005     char *p;
1006 
1007     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
1008         g_debug("Unsupported virtio device '%s'", syspath);
1009         return false;
1010     }
1011 
1012     p = strstr(syspath, "/target");
1013     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
1014                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
1015         /* virtio-scsi: target*:0:<target>:<unit> */
1016         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
1017         disk->bus = tgt[0];
1018         disk->target = tgt[1];
1019         disk->unit = tgt[2];
1020     } else {
1021         /* virtio-blk: 1 disk per 1 device */
1022         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
1023     }
1024 
1025     return true;
1026 }
1027 
1028 /*
1029  * Store disk device info for CCW devices (s390x channel I/O devices).
1030  * Returns true if information has been stored, or false for failure.
1031  */
1032 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
1033                                            GuestDiskAddress *disk,
1034                                            Error **errp)
1035 {
1036     unsigned int cssid, ssid, subchno, devno;
1037     char *p;
1038 
1039     p = strstr(syspath, "/devices/css");
1040     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
1041                      &cssid, &ssid, &subchno, &devno) < 4) {
1042         g_debug("could not parse ccw device sysfs path: %s", syspath);
1043         return false;
1044     }
1045 
1046     disk->ccw_address = g_new0(GuestCCWAddress, 1);
1047     disk->ccw_address->cssid = cssid;
1048     disk->ccw_address->ssid = ssid;
1049     disk->ccw_address->subchno = subchno;
1050     disk->ccw_address->devno = devno;
1051 
1052     if (strstr(p, "/virtio")) {
1053         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1054     }
1055 
1056     return true;
1057 }
1058 
1059 /* Store disk device info specified by @sysfs into @fs */
1060 static void build_guest_fsinfo_for_real_device(char const *syspath,
1061                                                GuestFilesystemInfo *fs,
1062                                                Error **errp)
1063 {
1064     GuestDiskAddress *disk;
1065     GuestPCIAddress *pciaddr;
1066     bool has_hwinf;
1067 #ifdef CONFIG_LIBUDEV
1068     struct udev *udev = NULL;
1069     struct udev_device *udevice = NULL;
1070 #endif
1071 
1072     pciaddr = g_new0(GuestPCIAddress, 1);
1073     pciaddr->domain = -1;                       /* -1 means field is invalid */
1074     pciaddr->bus = -1;
1075     pciaddr->slot = -1;
1076     pciaddr->function = -1;
1077 
1078     disk = g_new0(GuestDiskAddress, 1);
1079     disk->pci_controller = pciaddr;
1080     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
1081 
1082 #ifdef CONFIG_LIBUDEV
1083     udev = udev_new();
1084     udevice = udev_device_new_from_syspath(udev, syspath);
1085     if (udev == NULL || udevice == NULL) {
1086         g_debug("failed to query udev");
1087     } else {
1088         const char *devnode, *serial;
1089         devnode = udev_device_get_devnode(udevice);
1090         if (devnode != NULL) {
1091             disk->dev = g_strdup(devnode);
1092         }
1093         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
1094         if (serial != NULL && *serial != 0) {
1095             disk->serial = g_strdup(serial);
1096         }
1097     }
1098 
1099     udev_unref(udev);
1100     udev_device_unref(udevice);
1101 #endif
1102 
1103     if (strstr(syspath, "/devices/pci")) {
1104         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
1105     } else if (strstr(syspath, "/devices/css")) {
1106         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
1107     } else if (strstr(syspath, "/virtio")) {
1108         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1109     } else {
1110         g_debug("Unsupported device type for '%s'", syspath);
1111         has_hwinf = false;
1112     }
1113 
1114     if (has_hwinf || disk->dev || disk->serial) {
1115         QAPI_LIST_PREPEND(fs->disk, disk);
1116     } else {
1117         qapi_free_GuestDiskAddress(disk);
1118     }
1119 }
1120 
1121 static void build_guest_fsinfo_for_device(char const *devpath,
1122                                           GuestFilesystemInfo *fs,
1123                                           Error **errp);
1124 
1125 /* Store a list of slave devices of virtual volume specified by @syspath into
1126  * @fs */
1127 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
1128                                                   GuestFilesystemInfo *fs,
1129                                                   Error **errp)
1130 {
1131     Error *err = NULL;
1132     DIR *dir;
1133     char *dirpath;
1134     struct dirent *entry;
1135 
1136     dirpath = g_strdup_printf("%s/slaves", syspath);
1137     dir = opendir(dirpath);
1138     if (!dir) {
1139         if (errno != ENOENT) {
1140             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
1141         }
1142         g_free(dirpath);
1143         return;
1144     }
1145 
1146     for (;;) {
1147         errno = 0;
1148         entry = readdir(dir);
1149         if (entry == NULL) {
1150             if (errno) {
1151                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
1152             }
1153             break;
1154         }
1155 
1156         if (entry->d_type == DT_LNK) {
1157             char *path;
1158 
1159             g_debug(" slave device '%s'", entry->d_name);
1160             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
1161             build_guest_fsinfo_for_device(path, fs, &err);
1162             g_free(path);
1163 
1164             if (err) {
1165                 error_propagate(errp, err);
1166                 break;
1167             }
1168         }
1169     }
1170 
1171     g_free(dirpath);
1172     closedir(dir);
1173 }
1174 
1175 static bool is_disk_virtual(const char *devpath, Error **errp)
1176 {
1177     g_autofree char *syspath = realpath(devpath, NULL);
1178 
1179     if (!syspath) {
1180         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1181         return false;
1182     }
1183     return strstr(syspath, "/devices/virtual/block/") != NULL;
1184 }
1185 
1186 /* Dispatch to functions for virtual/real device */
1187 static void build_guest_fsinfo_for_device(char const *devpath,
1188                                           GuestFilesystemInfo *fs,
1189                                           Error **errp)
1190 {
1191     ERRP_GUARD();
1192     g_autofree char *syspath = NULL;
1193     bool is_virtual = false;
1194 
1195     syspath = realpath(devpath, NULL);
1196     if (!syspath) {
1197         if (errno != ENOENT) {
1198             error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1199             return;
1200         }
1201 
1202         /* ENOENT: This devpath may not exist because of container config */
1203         if (!fs->name) {
1204             fs->name = g_path_get_basename(devpath);
1205         }
1206         return;
1207     }
1208 
1209     if (!fs->name) {
1210         fs->name = g_path_get_basename(syspath);
1211     }
1212 
1213     g_debug("  parse sysfs path '%s'", syspath);
1214     is_virtual = is_disk_virtual(syspath, errp);
1215     if (*errp != NULL) {
1216         return;
1217     }
1218     if (is_virtual) {
1219         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
1220     } else {
1221         build_guest_fsinfo_for_real_device(syspath, fs, errp);
1222     }
1223 }
1224 
1225 #ifdef CONFIG_LIBUDEV
1226 
1227 /*
1228  * Wrapper around build_guest_fsinfo_for_device() for getting just
1229  * the disk address.
1230  */
1231 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
1232 {
1233     g_autoptr(GuestFilesystemInfo) fs = NULL;
1234 
1235     fs = g_new0(GuestFilesystemInfo, 1);
1236     build_guest_fsinfo_for_device(syspath, fs, errp);
1237     if (fs->disk != NULL) {
1238         return g_steal_pointer(&fs->disk->value);
1239     }
1240     return NULL;
1241 }
1242 
1243 static char *get_alias_for_syspath(const char *syspath)
1244 {
1245     struct udev *udev = NULL;
1246     struct udev_device *udevice = NULL;
1247     char *ret = NULL;
1248 
1249     udev = udev_new();
1250     if (udev == NULL) {
1251         g_debug("failed to query udev");
1252         goto out;
1253     }
1254     udevice = udev_device_new_from_syspath(udev, syspath);
1255     if (udevice == NULL) {
1256         g_debug("failed to query udev for path: %s", syspath);
1257         goto out;
1258     } else {
1259         const char *alias = udev_device_get_property_value(
1260             udevice, "DM_NAME");
1261         /*
1262          * NULL means there was an error and empty string means there is no
1263          * alias. In case of no alias we return NULL instead of empty string.
1264          */
1265         if (alias == NULL) {
1266             g_debug("failed to query udev for device alias for: %s",
1267                 syspath);
1268         } else if (*alias != 0) {
1269             ret = g_strdup(alias);
1270         }
1271     }
1272 
1273 out:
1274     udev_unref(udev);
1275     udev_device_unref(udevice);
1276     return ret;
1277 }
1278 
1279 static char *get_device_for_syspath(const char *syspath)
1280 {
1281     struct udev *udev = NULL;
1282     struct udev_device *udevice = NULL;
1283     char *ret = NULL;
1284 
1285     udev = udev_new();
1286     if (udev == NULL) {
1287         g_debug("failed to query udev");
1288         goto out;
1289     }
1290     udevice = udev_device_new_from_syspath(udev, syspath);
1291     if (udevice == NULL) {
1292         g_debug("failed to query udev for path: %s", syspath);
1293         goto out;
1294     } else {
1295         ret = g_strdup(udev_device_get_devnode(udevice));
1296     }
1297 
1298 out:
1299     udev_unref(udev);
1300     udev_device_unref(udevice);
1301     return ret;
1302 }
1303 
1304 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
1305 {
1306     g_autofree char *deps_dir = NULL;
1307     const gchar *dep;
1308     GDir *dp_deps = NULL;
1309 
1310     /* List dependent disks */
1311     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
1312     g_debug("  listing entries in: %s", deps_dir);
1313     dp_deps = g_dir_open(deps_dir, 0, NULL);
1314     if (dp_deps == NULL) {
1315         g_debug("failed to list entries in %s", deps_dir);
1316         return;
1317     }
1318     disk->has_dependencies = true;
1319     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
1320         g_autofree char *dep_dir = NULL;
1321         char *dev_name;
1322 
1323         /* Add dependent disks */
1324         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
1325         dev_name = get_device_for_syspath(dep_dir);
1326         if (dev_name != NULL) {
1327             g_debug("  adding dependent device: %s", dev_name);
1328             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
1329         }
1330     }
1331     g_dir_close(dp_deps);
1332 }
1333 
1334 /*
1335  * Detect partitions subdirectory, name is "<disk_name><number>" or
1336  * "<disk_name>p<number>"
1337  *
1338  * @disk_name -- last component of /sys path (e.g. sda)
1339  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
1340  * @disk_dev -- device node of the disk (e.g. /dev/sda)
1341  */
1342 static GuestDiskInfoList *get_disk_partitions(
1343     GuestDiskInfoList *list,
1344     const char *disk_name, const char *disk_dir,
1345     const char *disk_dev)
1346 {
1347     GuestDiskInfoList *ret = list;
1348     struct dirent *de_disk;
1349     DIR *dp_disk = NULL;
1350     size_t len = strlen(disk_name);
1351 
1352     dp_disk = opendir(disk_dir);
1353     while ((de_disk = readdir(dp_disk)) != NULL) {
1354         g_autofree char *partition_dir = NULL;
1355         char *dev_name;
1356         GuestDiskInfo *partition;
1357 
1358         if (!(de_disk->d_type & DT_DIR)) {
1359             continue;
1360         }
1361 
1362         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
1363             ((*(de_disk->d_name + len) == 'p' &&
1364             isdigit(*(de_disk->d_name + len + 1))) ||
1365                 isdigit(*(de_disk->d_name + len))))) {
1366             continue;
1367         }
1368 
1369         partition_dir = g_strdup_printf("%s/%s",
1370             disk_dir, de_disk->d_name);
1371         dev_name = get_device_for_syspath(partition_dir);
1372         if (dev_name == NULL) {
1373             g_debug("Failed to get device name for syspath: %s",
1374                 disk_dir);
1375             continue;
1376         }
1377         partition = g_new0(GuestDiskInfo, 1);
1378         partition->name = dev_name;
1379         partition->partition = true;
1380         partition->has_dependencies = true;
1381         /* Add parent disk as dependent for easier tracking of hierarchy */
1382         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
1383 
1384         QAPI_LIST_PREPEND(ret, partition);
1385     }
1386     closedir(dp_disk);
1387 
1388     return ret;
1389 }
1390 
1391 static void get_nvme_smart(GuestDiskInfo *disk)
1392 {
1393     int fd;
1394     GuestNVMeSmart *smart;
1395     NvmeSmartLog log = {0};
1396     struct nvme_admin_cmd cmd = {
1397         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
1398         .nsid = NVME_NSID_BROADCAST,
1399         .addr = (uintptr_t)&log,
1400         .data_len = sizeof(log),
1401         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
1402                  | (((sizeof(log) >> 2) - 1) << 16)
1403     };
1404 
1405     fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
1406     if (fd == -1) {
1407         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
1408         return;
1409     }
1410 
1411     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
1412         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
1413         close(fd);
1414         return;
1415     }
1416 
1417     disk->smart = g_new0(GuestDiskSmart, 1);
1418     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
1419 
1420     smart = &disk->smart->u.nvme;
1421     smart->critical_warning = log.critical_warning;
1422     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
1423     smart->available_spare = log.available_spare;
1424     smart->available_spare_threshold = log.available_spare_threshold;
1425     smart->percentage_used = log.percentage_used;
1426     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
1427     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
1428     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
1429     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
1430     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
1431     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
1432     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
1433     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
1434     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
1435     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
1436     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
1437     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
1438     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
1439     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
1440     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
1441     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
1442     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
1443     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
1444     smart->number_of_error_log_entries_lo =
1445         le64_to_cpu(log.number_of_error_log_entries[0]);
1446     smart->number_of_error_log_entries_hi =
1447         le64_to_cpu(log.number_of_error_log_entries[1]);
1448 
1449     close(fd);
1450 }
1451 
1452 static void get_disk_smart(GuestDiskInfo *disk)
1453 {
1454     if (disk->address
1455         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
1456         get_nvme_smart(disk);
1457     }
1458 }
1459 
1460 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1461 {
1462     GuestDiskInfoList *ret = NULL;
1463     GuestDiskInfo *disk;
1464     DIR *dp = NULL;
1465     struct dirent *de = NULL;
1466 
1467     g_debug("listing /sys/block directory");
1468     dp = opendir("/sys/block");
1469     if (dp == NULL) {
1470         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
1471         return NULL;
1472     }
1473     while ((de = readdir(dp)) != NULL) {
1474         g_autofree char *disk_dir = NULL, *line = NULL,
1475             *size_path = NULL;
1476         char *dev_name;
1477         Error *local_err = NULL;
1478         if (de->d_type != DT_LNK) {
1479             g_debug("  skipping entry: %s", de->d_name);
1480             continue;
1481         }
1482 
1483         /* Check size and skip zero-sized disks */
1484         g_debug("  checking disk size");
1485         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1486         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1487             g_debug("  failed to read disk size");
1488             continue;
1489         }
1490         if (g_strcmp0(line, "0\n") == 0) {
1491             g_debug("  skipping zero-sized disk");
1492             continue;
1493         }
1494 
1495         g_debug("  adding %s", de->d_name);
1496         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1497         dev_name = get_device_for_syspath(disk_dir);
1498         if (dev_name == NULL) {
1499             g_debug("Failed to get device name for syspath: %s",
1500                 disk_dir);
1501             continue;
1502         }
1503         disk = g_new0(GuestDiskInfo, 1);
1504         disk->name = dev_name;
1505         disk->partition = false;
1506         disk->alias = get_alias_for_syspath(disk_dir);
1507         QAPI_LIST_PREPEND(ret, disk);
1508 
1509         /* Get address for non-virtual devices */
1510         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1511         if (local_err != NULL) {
1512             g_debug("  failed to check disk path, ignoring error: %s",
1513                 error_get_pretty(local_err));
1514             error_free(local_err);
1515             local_err = NULL;
1516             /* Don't try to get the address */
1517             is_virtual = true;
1518         }
1519         if (!is_virtual) {
1520             disk->address = get_disk_address(disk_dir, &local_err);
1521             if (local_err != NULL) {
1522                 g_debug("  failed to get device info, ignoring error: %s",
1523                     error_get_pretty(local_err));
1524                 error_free(local_err);
1525                 local_err = NULL;
1526             }
1527         }
1528 
1529         get_disk_deps(disk_dir, disk);
1530         get_disk_smart(disk);
1531         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1532     }
1533 
1534     closedir(dp);
1535 
1536     return ret;
1537 }
1538 
1539 #else
1540 
1541 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1542 {
1543     error_setg(errp, QERR_UNSUPPORTED);
1544     return NULL;
1545 }
1546 
1547 #endif
1548 
1549 /* Return a list of the disk device(s)' info which @mount lies on */
1550 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1551                                                Error **errp)
1552 {
1553     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1554     struct statvfs buf;
1555     unsigned long used, nonroot_total, fr_size;
1556     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1557                                     mount->devmajor, mount->devminor);
1558 
1559     fs->mountpoint = g_strdup(mount->dirname);
1560     fs->type = g_strdup(mount->devtype);
1561     build_guest_fsinfo_for_device(devpath, fs, errp);
1562 
1563     if (statvfs(fs->mountpoint, &buf) == 0) {
1564         fr_size = buf.f_frsize;
1565         used = buf.f_blocks - buf.f_bfree;
1566         nonroot_total = used + buf.f_bavail;
1567         fs->used_bytes = used * fr_size;
1568         fs->total_bytes = nonroot_total * fr_size;
1569 
1570         fs->has_total_bytes = true;
1571         fs->has_used_bytes = true;
1572     }
1573 
1574     g_free(devpath);
1575 
1576     return fs;
1577 }
1578 
1579 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1580 {
1581     FsMountList mounts;
1582     struct FsMount *mount;
1583     GuestFilesystemInfoList *ret = NULL;
1584     Error *local_err = NULL;
1585 
1586     QTAILQ_INIT(&mounts);
1587     if (!build_fs_mount_list(&mounts, &local_err)) {
1588         error_propagate(errp, local_err);
1589         return NULL;
1590     }
1591 
1592     QTAILQ_FOREACH(mount, &mounts, next) {
1593         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1594 
1595         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1596         if (local_err) {
1597             error_propagate(errp, local_err);
1598             qapi_free_GuestFilesystemInfoList(ret);
1599             ret = NULL;
1600             break;
1601         }
1602     }
1603 
1604     free_fs_mount_list(&mounts);
1605     return ret;
1606 }
1607 #endif /* CONFIG_FSFREEZE */
1608 
1609 #if defined(CONFIG_FSTRIM)
1610 /*
1611  * Walk list of mounted file systems in the guest, and trim them.
1612  */
1613 GuestFilesystemTrimResponse *
1614 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1615 {
1616     GuestFilesystemTrimResponse *response;
1617     GuestFilesystemTrimResult *result;
1618     int ret = 0;
1619     FsMountList mounts;
1620     struct FsMount *mount;
1621     int fd;
1622     struct fstrim_range r;
1623 
1624     slog("guest-fstrim called");
1625 
1626     QTAILQ_INIT(&mounts);
1627     if (!build_fs_mount_list(&mounts, errp)) {
1628         return NULL;
1629     }
1630 
1631     response = g_malloc0(sizeof(*response));
1632 
1633     QTAILQ_FOREACH(mount, &mounts, next) {
1634         result = g_malloc0(sizeof(*result));
1635         result->path = g_strdup(mount->dirname);
1636 
1637         QAPI_LIST_PREPEND(response->paths, result);
1638 
1639         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1640         if (fd == -1) {
1641             result->error = g_strdup_printf("failed to open: %s",
1642                                             strerror(errno));
1643             continue;
1644         }
1645 
1646         /* We try to cull filesystems we know won't work in advance, but other
1647          * filesystems may not implement fstrim for less obvious reasons.
1648          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1649          * will be reported (e.g. CD-ROMs).
1650          * Any other error means an unexpected error.
1651          */
1652         r.start = 0;
1653         r.len = -1;
1654         r.minlen = has_minimum ? minimum : 0;
1655         ret = ioctl(fd, FITRIM, &r);
1656         if (ret == -1) {
1657             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1658                 result->error = g_strdup("trim not supported");
1659             } else {
1660                 result->error = g_strdup_printf("failed to trim: %s",
1661                                                 strerror(errno));
1662             }
1663             close(fd);
1664             continue;
1665         }
1666 
1667         result->has_minimum = true;
1668         result->minimum = r.minlen;
1669         result->has_trimmed = true;
1670         result->trimmed = r.len;
1671         close(fd);
1672     }
1673 
1674     free_fs_mount_list(&mounts);
1675     return response;
1676 }
1677 #endif /* CONFIG_FSTRIM */
1678 
1679 
1680 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1681 #define SUSPEND_SUPPORTED 0
1682 #define SUSPEND_NOT_SUPPORTED 1
1683 
1684 typedef enum {
1685     SUSPEND_MODE_DISK = 0,
1686     SUSPEND_MODE_RAM = 1,
1687     SUSPEND_MODE_HYBRID = 2,
1688 } SuspendMode;
1689 
1690 /*
1691  * Executes a command in a child process using g_spawn_sync,
1692  * returning an int >= 0 representing the exit status of the
1693  * process.
1694  *
1695  * If the program wasn't found in path, returns -1.
1696  *
1697  * If a problem happened when creating the child process,
1698  * returns -1 and errp is set.
1699  */
1700 static int run_process_child(const char *command[], Error **errp)
1701 {
1702     int exit_status, spawn_flag;
1703     GError *g_err = NULL;
1704     bool success;
1705 
1706     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1707                  G_SPAWN_STDERR_TO_DEV_NULL;
1708 
1709     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1710                             NULL, NULL, NULL, NULL,
1711                             &exit_status, &g_err);
1712 
1713     if (success) {
1714         return WEXITSTATUS(exit_status);
1715     }
1716 
1717     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1718         error_setg(errp, "failed to create child process, error '%s'",
1719                    g_err->message);
1720     }
1721 
1722     g_error_free(g_err);
1723     return -1;
1724 }
1725 
1726 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1727 {
1728     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1729                                      "systemd-hybrid-sleep"};
1730     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1731     int status;
1732 
1733     status = run_process_child(cmd, errp);
1734 
1735     /*
1736      * systemctl status uses LSB return codes so we can expect
1737      * status > 0 and be ok. To assert if the guest has support
1738      * for the selected suspend mode, status should be < 4. 4 is
1739      * the code for unknown service status, the return value when
1740      * the service does not exist. A common value is status = 3
1741      * (program is not running).
1742      */
1743     if (status > 0 && status < 4) {
1744         return true;
1745     }
1746 
1747     return false;
1748 }
1749 
1750 static void systemd_suspend(SuspendMode mode, Error **errp)
1751 {
1752     Error *local_err = NULL;
1753     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1754     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1755     int status;
1756 
1757     status = run_process_child(cmd, &local_err);
1758 
1759     if (status == 0) {
1760         return;
1761     }
1762 
1763     if ((status == -1) && !local_err) {
1764         error_setg(errp, "the helper program 'systemctl %s' was not found",
1765                    systemctl_args[mode]);
1766         return;
1767     }
1768 
1769     if (local_err) {
1770         error_propagate(errp, local_err);
1771     } else {
1772         error_setg(errp, "the helper program 'systemctl %s' returned an "
1773                    "unexpected exit status code (%d)",
1774                    systemctl_args[mode], status);
1775     }
1776 }
1777 
1778 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1779 {
1780     Error *local_err = NULL;
1781     const char *pmutils_args[3] = {"--hibernate", "--suspend",
1782                                    "--suspend-hybrid"};
1783     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1784     int status;
1785 
1786     status = run_process_child(cmd, &local_err);
1787 
1788     if (status == SUSPEND_SUPPORTED) {
1789         return true;
1790     }
1791 
1792     if ((status == -1) && !local_err) {
1793         return false;
1794     }
1795 
1796     if (local_err) {
1797         error_propagate(errp, local_err);
1798     } else {
1799         error_setg(errp,
1800                    "the helper program '%s' returned an unexpected exit"
1801                    " status code (%d)", "pm-is-supported", status);
1802     }
1803 
1804     return false;
1805 }
1806 
1807 static void pmutils_suspend(SuspendMode mode, Error **errp)
1808 {
1809     Error *local_err = NULL;
1810     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1811                                        "pm-suspend-hybrid"};
1812     const char *cmd[2] = {pmutils_binaries[mode], NULL};
1813     int status;
1814 
1815     status = run_process_child(cmd, &local_err);
1816 
1817     if (status == 0) {
1818         return;
1819     }
1820 
1821     if ((status == -1) && !local_err) {
1822         error_setg(errp, "the helper program '%s' was not found",
1823                    pmutils_binaries[mode]);
1824         return;
1825     }
1826 
1827     if (local_err) {
1828         error_propagate(errp, local_err);
1829     } else {
1830         error_setg(errp,
1831                    "the helper program '%s' returned an unexpected exit"
1832                    " status code (%d)", pmutils_binaries[mode], status);
1833     }
1834 }
1835 
1836 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1837 {
1838     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1839     const char *sysfile_str = sysfile_strs[mode];
1840     char buf[32]; /* hopefully big enough */
1841     int fd;
1842     ssize_t ret;
1843 
1844     if (!sysfile_str) {
1845         error_setg(errp, "unknown guest suspend mode");
1846         return false;
1847     }
1848 
1849     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1850     if (fd < 0) {
1851         return false;
1852     }
1853 
1854     ret = read(fd, buf, sizeof(buf) - 1);
1855     close(fd);
1856     if (ret <= 0) {
1857         return false;
1858     }
1859     buf[ret] = '\0';
1860 
1861     if (strstr(buf, sysfile_str)) {
1862         return true;
1863     }
1864     return false;
1865 }
1866 
1867 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1868 {
1869     Error *local_err = NULL;
1870     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1871     const char *sysfile_str = sysfile_strs[mode];
1872     pid_t pid;
1873     int status;
1874 
1875     if (!sysfile_str) {
1876         error_setg(errp, "unknown guest suspend mode");
1877         return;
1878     }
1879 
1880     pid = fork();
1881     if (!pid) {
1882         /* child */
1883         int fd;
1884 
1885         setsid();
1886         reopen_fd_to_null(0);
1887         reopen_fd_to_null(1);
1888         reopen_fd_to_null(2);
1889 
1890         fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1891         if (fd < 0) {
1892             _exit(EXIT_FAILURE);
1893         }
1894 
1895         if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1896             _exit(EXIT_FAILURE);
1897         }
1898 
1899         _exit(EXIT_SUCCESS);
1900     } else if (pid < 0) {
1901         error_setg_errno(errp, errno, "failed to create child process");
1902         return;
1903     }
1904 
1905     ga_wait_child(pid, &status, &local_err);
1906     if (local_err) {
1907         error_propagate(errp, local_err);
1908         return;
1909     }
1910 
1911     if (WEXITSTATUS(status)) {
1912         error_setg(errp, "child process has failed to suspend");
1913     }
1914 
1915 }
1916 
1917 static void guest_suspend(SuspendMode mode, Error **errp)
1918 {
1919     Error *local_err = NULL;
1920     bool mode_supported = false;
1921 
1922     if (systemd_supports_mode(mode, &local_err)) {
1923         mode_supported = true;
1924         systemd_suspend(mode, &local_err);
1925 
1926         if (!local_err) {
1927             return;
1928         }
1929     }
1930 
1931     error_free(local_err);
1932     local_err = NULL;
1933 
1934     if (pmutils_supports_mode(mode, &local_err)) {
1935         mode_supported = true;
1936         pmutils_suspend(mode, &local_err);
1937 
1938         if (!local_err) {
1939             return;
1940         }
1941     }
1942 
1943     error_free(local_err);
1944     local_err = NULL;
1945 
1946     if (linux_sys_state_supports_mode(mode, &local_err)) {
1947         mode_supported = true;
1948         linux_sys_state_suspend(mode, &local_err);
1949     }
1950 
1951     if (!mode_supported) {
1952         error_free(local_err);
1953         error_setg(errp,
1954                    "the requested suspend mode is not supported by the guest");
1955     } else {
1956         error_propagate(errp, local_err);
1957     }
1958 }
1959 
1960 void qmp_guest_suspend_disk(Error **errp)
1961 {
1962     guest_suspend(SUSPEND_MODE_DISK, errp);
1963 }
1964 
1965 void qmp_guest_suspend_ram(Error **errp)
1966 {
1967     guest_suspend(SUSPEND_MODE_RAM, errp);
1968 }
1969 
1970 void qmp_guest_suspend_hybrid(Error **errp)
1971 {
1972     guest_suspend(SUSPEND_MODE_HYBRID, errp);
1973 }
1974 
1975 /* Transfer online/offline status between @vcpu and the guest system.
1976  *
1977  * On input either @errp or *@errp must be NULL.
1978  *
1979  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1980  * - R: vcpu->logical_id
1981  * - W: vcpu->online
1982  * - W: vcpu->can_offline
1983  *
1984  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1985  * - R: vcpu->logical_id
1986  * - R: vcpu->online
1987  *
1988  * Written members remain unmodified on error.
1989  */
1990 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1991                           char *dirpath, Error **errp)
1992 {
1993     int fd;
1994     int res;
1995     int dirfd;
1996     static const char fn[] = "online";
1997 
1998     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1999     if (dirfd == -1) {
2000         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2001         return;
2002     }
2003 
2004     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
2005     if (fd == -1) {
2006         if (errno != ENOENT) {
2007             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
2008         } else if (sys2vcpu) {
2009             vcpu->online = true;
2010             vcpu->can_offline = false;
2011         } else if (!vcpu->online) {
2012             error_setg(errp, "logical processor #%" PRId64 " can't be "
2013                        "offlined", vcpu->logical_id);
2014         } /* otherwise pretend successful re-onlining */
2015     } else {
2016         unsigned char status;
2017 
2018         res = pread(fd, &status, 1, 0);
2019         if (res == -1) {
2020             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
2021         } else if (res == 0) {
2022             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
2023                        fn);
2024         } else if (sys2vcpu) {
2025             vcpu->online = (status != '0');
2026             vcpu->can_offline = true;
2027         } else if (vcpu->online != (status != '0')) {
2028             status = '0' + vcpu->online;
2029             if (pwrite(fd, &status, 1, 0) == -1) {
2030                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
2031                                  fn);
2032             }
2033         } /* otherwise pretend successful re-(on|off)-lining */
2034 
2035         res = close(fd);
2036         g_assert(res == 0);
2037     }
2038 
2039     res = close(dirfd);
2040     g_assert(res == 0);
2041 }
2042 
2043 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2044 {
2045     GuestLogicalProcessorList *head, **tail;
2046     const char *cpu_dir = "/sys/devices/system/cpu";
2047     const gchar *line;
2048     g_autoptr(GDir) cpu_gdir = NULL;
2049     Error *local_err = NULL;
2050 
2051     head = NULL;
2052     tail = &head;
2053     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
2054 
2055     if (cpu_gdir == NULL) {
2056         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
2057         return NULL;
2058     }
2059 
2060     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
2061         GuestLogicalProcessor *vcpu;
2062         int64_t id;
2063         if (sscanf(line, "cpu%" PRId64, &id)) {
2064             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
2065                                                     "cpu%" PRId64 "/", id);
2066             vcpu = g_malloc0(sizeof *vcpu);
2067             vcpu->logical_id = id;
2068             vcpu->has_can_offline = true; /* lolspeak ftw */
2069             transfer_vcpu(vcpu, true, path, &local_err);
2070             QAPI_LIST_APPEND(tail, vcpu);
2071         }
2072     }
2073 
2074     if (local_err == NULL) {
2075         /* there's no guest with zero VCPUs */
2076         g_assert(head != NULL);
2077         return head;
2078     }
2079 
2080     qapi_free_GuestLogicalProcessorList(head);
2081     error_propagate(errp, local_err);
2082     return NULL;
2083 }
2084 
2085 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2086 {
2087     int64_t processed;
2088     Error *local_err = NULL;
2089 
2090     processed = 0;
2091     while (vcpus != NULL) {
2092         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
2093                                      vcpus->value->logical_id);
2094 
2095         transfer_vcpu(vcpus->value, false, path, &local_err);
2096         g_free(path);
2097         if (local_err != NULL) {
2098             break;
2099         }
2100         ++processed;
2101         vcpus = vcpus->next;
2102     }
2103 
2104     if (local_err != NULL) {
2105         if (processed == 0) {
2106             error_propagate(errp, local_err);
2107         } else {
2108             error_free(local_err);
2109         }
2110     }
2111 
2112     return processed;
2113 }
2114 #endif /* __linux__ */
2115 
2116 #if defined(__linux__) || defined(__FreeBSD__)
2117 void qmp_guest_set_user_password(const char *username,
2118                                  const char *password,
2119                                  bool crypted,
2120                                  Error **errp)
2121 {
2122     Error *local_err = NULL;
2123     char *passwd_path = NULL;
2124     pid_t pid;
2125     int status;
2126     int datafd[2] = { -1, -1 };
2127     char *rawpasswddata = NULL;
2128     size_t rawpasswdlen;
2129     char *chpasswddata = NULL;
2130     size_t chpasswdlen;
2131 
2132     rawpasswddata = (char *)qbase64_decode(password, -1, &rawpasswdlen, errp);
2133     if (!rawpasswddata) {
2134         return;
2135     }
2136     rawpasswddata = g_renew(char, rawpasswddata, rawpasswdlen + 1);
2137     rawpasswddata[rawpasswdlen] = '\0';
2138 
2139     if (strchr(rawpasswddata, '\n')) {
2140         error_setg(errp, "forbidden characters in raw password");
2141         goto out;
2142     }
2143 
2144     if (strchr(username, '\n') ||
2145         strchr(username, ':')) {
2146         error_setg(errp, "forbidden characters in username");
2147         goto out;
2148     }
2149 
2150 #ifdef __FreeBSD__
2151     chpasswddata = g_strdup(rawpasswddata);
2152     passwd_path = g_find_program_in_path("pw");
2153 #else
2154     chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata);
2155     passwd_path = g_find_program_in_path("chpasswd");
2156 #endif
2157 
2158     chpasswdlen = strlen(chpasswddata);
2159 
2160     if (!passwd_path) {
2161         error_setg(errp, "cannot find 'passwd' program in PATH");
2162         goto out;
2163     }
2164 
2165     if (!g_unix_open_pipe(datafd, FD_CLOEXEC, NULL)) {
2166         error_setg(errp, "cannot create pipe FDs");
2167         goto out;
2168     }
2169 
2170     pid = fork();
2171     if (pid == 0) {
2172         close(datafd[1]);
2173         /* child */
2174         setsid();
2175         dup2(datafd[0], 0);
2176         reopen_fd_to_null(1);
2177         reopen_fd_to_null(2);
2178 
2179 #ifdef __FreeBSD__
2180         const char *h_arg;
2181         h_arg = (crypted) ? "-H" : "-h";
2182         execl(passwd_path, "pw", "usermod", "-n", username, h_arg, "0", NULL);
2183 #else
2184         if (crypted) {
2185             execl(passwd_path, "chpasswd", "-e", NULL);
2186         } else {
2187             execl(passwd_path, "chpasswd", NULL);
2188         }
2189 #endif
2190         _exit(EXIT_FAILURE);
2191     } else if (pid < 0) {
2192         error_setg_errno(errp, errno, "failed to create child process");
2193         goto out;
2194     }
2195     close(datafd[0]);
2196     datafd[0] = -1;
2197 
2198     if (qemu_write_full(datafd[1], chpasswddata, chpasswdlen) != chpasswdlen) {
2199         error_setg_errno(errp, errno, "cannot write new account password");
2200         goto out;
2201     }
2202     close(datafd[1]);
2203     datafd[1] = -1;
2204 
2205     ga_wait_child(pid, &status, &local_err);
2206     if (local_err) {
2207         error_propagate(errp, local_err);
2208         goto out;
2209     }
2210 
2211     if (!WIFEXITED(status)) {
2212         error_setg(errp, "child process has terminated abnormally");
2213         goto out;
2214     }
2215 
2216     if (WEXITSTATUS(status)) {
2217         error_setg(errp, "child process has failed to set user password");
2218         goto out;
2219     }
2220 
2221 out:
2222     g_free(chpasswddata);
2223     g_free(rawpasswddata);
2224     g_free(passwd_path);
2225     if (datafd[0] != -1) {
2226         close(datafd[0]);
2227     }
2228     if (datafd[1] != -1) {
2229         close(datafd[1]);
2230     }
2231 }
2232 #else /* __linux__ || __FreeBSD__ */
2233 void qmp_guest_set_user_password(const char *username,
2234                                  const char *password,
2235                                  bool crypted,
2236                                  Error **errp)
2237 {
2238     error_setg(errp, QERR_UNSUPPORTED);
2239 }
2240 #endif /* __linux__ || __FreeBSD__ */
2241 
2242 #ifdef __linux__
2243 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
2244                                int size, Error **errp)
2245 {
2246     int fd;
2247     int res;
2248 
2249     errno = 0;
2250     fd = openat(dirfd, pathname, O_RDONLY);
2251     if (fd == -1) {
2252         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2253         return;
2254     }
2255 
2256     res = pread(fd, buf, size, 0);
2257     if (res == -1) {
2258         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
2259     } else if (res == 0) {
2260         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
2261     }
2262     close(fd);
2263 }
2264 
2265 static void ga_write_sysfs_file(int dirfd, const char *pathname,
2266                                 const char *buf, int size, Error **errp)
2267 {
2268     int fd;
2269 
2270     errno = 0;
2271     fd = openat(dirfd, pathname, O_WRONLY);
2272     if (fd == -1) {
2273         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2274         return;
2275     }
2276 
2277     if (pwrite(fd, buf, size, 0) == -1) {
2278         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
2279     }
2280 
2281     close(fd);
2282 }
2283 
2284 /* Transfer online/offline status between @mem_blk and the guest system.
2285  *
2286  * On input either @errp or *@errp must be NULL.
2287  *
2288  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
2289  * - R: mem_blk->phys_index
2290  * - W: mem_blk->online
2291  * - W: mem_blk->can_offline
2292  *
2293  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
2294  * - R: mem_blk->phys_index
2295  * - R: mem_blk->online
2296  *-  R: mem_blk->can_offline
2297  * Written members remain unmodified on error.
2298  */
2299 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
2300                                   GuestMemoryBlockResponse *result,
2301                                   Error **errp)
2302 {
2303     char *dirpath;
2304     int dirfd;
2305     char *status;
2306     Error *local_err = NULL;
2307 
2308     if (!sys2memblk) {
2309         DIR *dp;
2310 
2311         if (!result) {
2312             error_setg(errp, "Internal error, 'result' should not be NULL");
2313             return;
2314         }
2315         errno = 0;
2316         dp = opendir("/sys/devices/system/memory/");
2317          /* if there is no 'memory' directory in sysfs,
2318          * we think this VM does not support online/offline memory block,
2319          * any other solution?
2320          */
2321         if (!dp) {
2322             if (errno == ENOENT) {
2323                 result->response =
2324                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2325             }
2326             goto out1;
2327         }
2328         closedir(dp);
2329     }
2330 
2331     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
2332                               mem_blk->phys_index);
2333     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2334     if (dirfd == -1) {
2335         if (sys2memblk) {
2336             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2337         } else {
2338             if (errno == ENOENT) {
2339                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
2340             } else {
2341                 result->response =
2342                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2343             }
2344         }
2345         g_free(dirpath);
2346         goto out1;
2347     }
2348     g_free(dirpath);
2349 
2350     status = g_malloc0(10);
2351     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
2352     if (local_err) {
2353         /* treat with sysfs file that not exist in old kernel */
2354         if (errno == ENOENT) {
2355             error_free(local_err);
2356             if (sys2memblk) {
2357                 mem_blk->online = true;
2358                 mem_blk->can_offline = false;
2359             } else if (!mem_blk->online) {
2360                 result->response =
2361                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2362             }
2363         } else {
2364             if (sys2memblk) {
2365                 error_propagate(errp, local_err);
2366             } else {
2367                 error_free(local_err);
2368                 result->response =
2369                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2370             }
2371         }
2372         goto out2;
2373     }
2374 
2375     if (sys2memblk) {
2376         char removable = '0';
2377 
2378         mem_blk->online = (strncmp(status, "online", 6) == 0);
2379 
2380         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
2381         if (local_err) {
2382             /* if no 'removable' file, it doesn't support offline mem blk */
2383             if (errno == ENOENT) {
2384                 error_free(local_err);
2385                 mem_blk->can_offline = false;
2386             } else {
2387                 error_propagate(errp, local_err);
2388             }
2389         } else {
2390             mem_blk->can_offline = (removable != '0');
2391         }
2392     } else {
2393         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
2394             const char *new_state = mem_blk->online ? "online" : "offline";
2395 
2396             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
2397                                 &local_err);
2398             if (local_err) {
2399                 error_free(local_err);
2400                 result->response =
2401                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2402                 goto out2;
2403             }
2404 
2405             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
2406             result->has_error_code = false;
2407         } /* otherwise pretend successful re-(on|off)-lining */
2408     }
2409     g_free(status);
2410     close(dirfd);
2411     return;
2412 
2413 out2:
2414     g_free(status);
2415     close(dirfd);
2416 out1:
2417     if (!sys2memblk) {
2418         result->has_error_code = true;
2419         result->error_code = errno;
2420     }
2421 }
2422 
2423 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2424 {
2425     GuestMemoryBlockList *head, **tail;
2426     Error *local_err = NULL;
2427     struct dirent *de;
2428     DIR *dp;
2429 
2430     head = NULL;
2431     tail = &head;
2432 
2433     dp = opendir("/sys/devices/system/memory/");
2434     if (!dp) {
2435         /* it's ok if this happens to be a system that doesn't expose
2436          * memory blocks via sysfs, but otherwise we should report
2437          * an error
2438          */
2439         if (errno != ENOENT) {
2440             error_setg_errno(errp, errno, "Can't open directory"
2441                              "\"/sys/devices/system/memory/\"");
2442         }
2443         return NULL;
2444     }
2445 
2446     /* Note: the phys_index of memory block may be discontinuous,
2447      * this is because a memblk is the unit of the Sparse Memory design, which
2448      * allows discontinuous memory ranges (ex. NUMA), so here we should
2449      * traverse the memory block directory.
2450      */
2451     while ((de = readdir(dp)) != NULL) {
2452         GuestMemoryBlock *mem_blk;
2453 
2454         if ((strncmp(de->d_name, "memory", 6) != 0) ||
2455             !(de->d_type & DT_DIR)) {
2456             continue;
2457         }
2458 
2459         mem_blk = g_malloc0(sizeof *mem_blk);
2460         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
2461         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
2462         mem_blk->has_can_offline = true; /* lolspeak ftw */
2463         transfer_memory_block(mem_blk, true, NULL, &local_err);
2464         if (local_err) {
2465             break;
2466         }
2467 
2468         QAPI_LIST_APPEND(tail, mem_blk);
2469     }
2470 
2471     closedir(dp);
2472     if (local_err == NULL) {
2473         /* there's no guest with zero memory blocks */
2474         if (head == NULL) {
2475             error_setg(errp, "guest reported zero memory blocks!");
2476         }
2477         return head;
2478     }
2479 
2480     qapi_free_GuestMemoryBlockList(head);
2481     error_propagate(errp, local_err);
2482     return NULL;
2483 }
2484 
2485 GuestMemoryBlockResponseList *
2486 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2487 {
2488     GuestMemoryBlockResponseList *head, **tail;
2489     Error *local_err = NULL;
2490 
2491     head = NULL;
2492     tail = &head;
2493 
2494     while (mem_blks != NULL) {
2495         GuestMemoryBlockResponse *result;
2496         GuestMemoryBlock *current_mem_blk = mem_blks->value;
2497 
2498         result = g_malloc0(sizeof(*result));
2499         result->phys_index = current_mem_blk->phys_index;
2500         transfer_memory_block(current_mem_blk, false, result, &local_err);
2501         if (local_err) { /* should never happen */
2502             goto err;
2503         }
2504 
2505         QAPI_LIST_APPEND(tail, result);
2506         mem_blks = mem_blks->next;
2507     }
2508 
2509     return head;
2510 err:
2511     qapi_free_GuestMemoryBlockResponseList(head);
2512     error_propagate(errp, local_err);
2513     return NULL;
2514 }
2515 
2516 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2517 {
2518     Error *local_err = NULL;
2519     char *dirpath;
2520     int dirfd;
2521     char *buf;
2522     GuestMemoryBlockInfo *info;
2523 
2524     dirpath = g_strdup_printf("/sys/devices/system/memory/");
2525     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2526     if (dirfd == -1) {
2527         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2528         g_free(dirpath);
2529         return NULL;
2530     }
2531     g_free(dirpath);
2532 
2533     buf = g_malloc0(20);
2534     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
2535     close(dirfd);
2536     if (local_err) {
2537         g_free(buf);
2538         error_propagate(errp, local_err);
2539         return NULL;
2540     }
2541 
2542     info = g_new0(GuestMemoryBlockInfo, 1);
2543     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
2544 
2545     g_free(buf);
2546 
2547     return info;
2548 }
2549 
2550 #define MAX_NAME_LEN 128
2551 static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
2552 {
2553 #ifdef CONFIG_LINUX
2554     GuestDiskStatsInfoList *head = NULL, **tail = &head;
2555     const char *diskstats = "/proc/diskstats";
2556     FILE *fp;
2557     size_t n;
2558     char *line = NULL;
2559 
2560     fp = fopen(diskstats, "r");
2561     if (fp  == NULL) {
2562         error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
2563         return NULL;
2564     }
2565 
2566     while (getline(&line, &n, fp) != -1) {
2567         g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
2568         g_autofree GuestDiskStats *diskstat = NULL;
2569         char dev_name[MAX_NAME_LEN];
2570         unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
2571         unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
2572         unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
2573         unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
2574         unsigned int major, minor;
2575         int i;
2576 
2577         i = sscanf(line, "%u %u %s %lu %lu %lu"
2578                    "%lu %lu %lu %lu %u %u %u %u"
2579                    "%lu %lu %lu %u %lu %u",
2580                    &major, &minor, dev_name,
2581                    &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
2582                    &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
2583                    &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
2584                    &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
2585                    &fl_ios, &fl_ticks);
2586 
2587         if (i < 7) {
2588             continue;
2589         }
2590 
2591         diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
2592         diskstatinfo->name = g_strdup(dev_name);
2593         diskstatinfo->major = major;
2594         diskstatinfo->minor = minor;
2595 
2596         diskstat = g_new0(GuestDiskStats, 1);
2597         if (i == 7) {
2598             diskstat->has_read_ios = true;
2599             diskstat->read_ios = rd_ios;
2600             diskstat->has_read_sectors = true;
2601             diskstat->read_sectors = rd_merges_or_rd_sec;
2602             diskstat->has_write_ios = true;
2603             diskstat->write_ios = rd_sec_or_wr_ios;
2604             diskstat->has_write_sectors = true;
2605             diskstat->write_sectors = rd_ticks_or_wr_sec;
2606         }
2607         if (i >= 14) {
2608             diskstat->has_read_ios = true;
2609             diskstat->read_ios = rd_ios;
2610             diskstat->has_read_sectors = true;
2611             diskstat->read_sectors = rd_sec_or_wr_ios;
2612             diskstat->has_read_merges = true;
2613             diskstat->read_merges = rd_merges_or_rd_sec;
2614             diskstat->has_read_ticks = true;
2615             diskstat->read_ticks = rd_ticks_or_wr_sec;
2616             diskstat->has_write_ios = true;
2617             diskstat->write_ios = wr_ios;
2618             diskstat->has_write_sectors = true;
2619             diskstat->write_sectors = wr_sec;
2620             diskstat->has_write_merges = true;
2621             diskstat->write_merges = wr_merges;
2622             diskstat->has_write_ticks = true;
2623             diskstat->write_ticks = wr_ticks;
2624             diskstat->has_ios_pgr = true;
2625             diskstat->ios_pgr = ios_pgr;
2626             diskstat->has_total_ticks = true;
2627             diskstat->total_ticks = tot_ticks;
2628             diskstat->has_weight_ticks = true;
2629             diskstat->weight_ticks = rq_ticks;
2630         }
2631         if (i >= 18) {
2632             diskstat->has_discard_ios = true;
2633             diskstat->discard_ios = dc_ios;
2634             diskstat->has_discard_merges = true;
2635             diskstat->discard_merges = dc_merges;
2636             diskstat->has_discard_sectors = true;
2637             diskstat->discard_sectors = dc_sec;
2638             diskstat->has_discard_ticks = true;
2639             diskstat->discard_ticks = dc_ticks;
2640         }
2641         if (i >= 20) {
2642             diskstat->has_flush_ios = true;
2643             diskstat->flush_ios = fl_ios;
2644             diskstat->has_flush_ticks = true;
2645             diskstat->flush_ticks = fl_ticks;
2646         }
2647 
2648         diskstatinfo->stats = g_steal_pointer(&diskstat);
2649         QAPI_LIST_APPEND(tail, diskstatinfo);
2650         diskstatinfo = NULL;
2651     }
2652     free(line);
2653     fclose(fp);
2654     return head;
2655 #else
2656     g_debug("disk stats reporting available only for Linux");
2657     return NULL;
2658 #endif
2659 }
2660 
2661 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2662 {
2663     return guest_get_diskstats(errp);
2664 }
2665 
2666 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2667 {
2668     GuestCpuStatsList *head = NULL, **tail = &head;
2669     const char *cpustats = "/proc/stat";
2670     int clk_tck = sysconf(_SC_CLK_TCK);
2671     FILE *fp;
2672     size_t n;
2673     char *line = NULL;
2674 
2675     fp = fopen(cpustats, "r");
2676     if (fp  == NULL) {
2677         error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2678         return NULL;
2679     }
2680 
2681     while (getline(&line, &n, fp) != -1) {
2682         GuestCpuStats *cpustat = NULL;
2683         GuestLinuxCpuStats *linuxcpustat;
2684         int i;
2685         unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2686         unsigned long nice, guest_nice;
2687         char name[64];
2688 
2689         i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2690                    name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2691                    &steal, &guest, &guest_nice);
2692 
2693         /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2694         if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2695             continue;
2696         }
2697 
2698         if (i < 5) {
2699             slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2700             break;
2701         }
2702 
2703         cpustat = g_new0(GuestCpuStats, 1);
2704         cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2705 
2706         linuxcpustat = &cpustat->u.q_linux;
2707         linuxcpustat->cpu = atoi(&name[3]);
2708         linuxcpustat->user = user * 1000 / clk_tck;
2709         linuxcpustat->nice = nice * 1000 / clk_tck;
2710         linuxcpustat->system = system * 1000 / clk_tck;
2711         linuxcpustat->idle = idle * 1000 / clk_tck;
2712 
2713         if (i > 5) {
2714             linuxcpustat->has_iowait = true;
2715             linuxcpustat->iowait = iowait * 1000 / clk_tck;
2716         }
2717 
2718         if (i > 6) {
2719             linuxcpustat->has_irq = true;
2720             linuxcpustat->irq = irq * 1000 / clk_tck;
2721             linuxcpustat->has_softirq = true;
2722             linuxcpustat->softirq = softirq * 1000 / clk_tck;
2723         }
2724 
2725         if (i > 8) {
2726             linuxcpustat->has_steal = true;
2727             linuxcpustat->steal = steal * 1000 / clk_tck;
2728         }
2729 
2730         if (i > 9) {
2731             linuxcpustat->has_guest = true;
2732             linuxcpustat->guest = guest * 1000 / clk_tck;
2733         }
2734 
2735         if (i > 10) {
2736             linuxcpustat->has_guest = true;
2737             linuxcpustat->guest = guest * 1000 / clk_tck;
2738             linuxcpustat->has_guestnice = true;
2739             linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2740         }
2741 
2742         QAPI_LIST_APPEND(tail, cpustat);
2743     }
2744 
2745     free(line);
2746     fclose(fp);
2747     return head;
2748 }
2749 
2750 #else /* defined(__linux__) */
2751 
2752 void qmp_guest_suspend_disk(Error **errp)
2753 {
2754     error_setg(errp, QERR_UNSUPPORTED);
2755 }
2756 
2757 void qmp_guest_suspend_ram(Error **errp)
2758 {
2759     error_setg(errp, QERR_UNSUPPORTED);
2760 }
2761 
2762 void qmp_guest_suspend_hybrid(Error **errp)
2763 {
2764     error_setg(errp, QERR_UNSUPPORTED);
2765 }
2766 
2767 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2768 {
2769     error_setg(errp, QERR_UNSUPPORTED);
2770     return NULL;
2771 }
2772 
2773 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2774 {
2775     error_setg(errp, QERR_UNSUPPORTED);
2776     return -1;
2777 }
2778 
2779 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2780 {
2781     error_setg(errp, QERR_UNSUPPORTED);
2782     return NULL;
2783 }
2784 
2785 GuestMemoryBlockResponseList *
2786 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2787 {
2788     error_setg(errp, QERR_UNSUPPORTED);
2789     return NULL;
2790 }
2791 
2792 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2793 {
2794     error_setg(errp, QERR_UNSUPPORTED);
2795     return NULL;
2796 }
2797 
2798 #endif
2799 
2800 #ifdef HAVE_GETIFADDRS
2801 static GuestNetworkInterface *
2802 guest_find_interface(GuestNetworkInterfaceList *head,
2803                      const char *name)
2804 {
2805     for (; head; head = head->next) {
2806         if (strcmp(head->value->name, name) == 0) {
2807             return head->value;
2808         }
2809     }
2810 
2811     return NULL;
2812 }
2813 
2814 static int guest_get_network_stats(const char *name,
2815                        GuestNetworkInterfaceStat *stats)
2816 {
2817 #ifdef CONFIG_LINUX
2818     int name_len;
2819     char const *devinfo = "/proc/net/dev";
2820     FILE *fp;
2821     char *line = NULL, *colon;
2822     size_t n = 0;
2823     fp = fopen(devinfo, "r");
2824     if (!fp) {
2825         g_debug("failed to open network stats %s: %s", devinfo,
2826                 g_strerror(errno));
2827         return -1;
2828     }
2829     name_len = strlen(name);
2830     while (getline(&line, &n, fp) != -1) {
2831         long long dummy;
2832         long long rx_bytes;
2833         long long rx_packets;
2834         long long rx_errs;
2835         long long rx_dropped;
2836         long long tx_bytes;
2837         long long tx_packets;
2838         long long tx_errs;
2839         long long tx_dropped;
2840         char *trim_line;
2841         trim_line = g_strchug(line);
2842         if (trim_line[0] == '\0') {
2843             continue;
2844         }
2845         colon = strchr(trim_line, ':');
2846         if (!colon) {
2847             continue;
2848         }
2849         if (colon - name_len  == trim_line &&
2850            strncmp(trim_line, name, name_len) == 0) {
2851             if (sscanf(colon + 1,
2852                 "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld",
2853                   &rx_bytes, &rx_packets, &rx_errs, &rx_dropped,
2854                   &dummy, &dummy, &dummy, &dummy,
2855                   &tx_bytes, &tx_packets, &tx_errs, &tx_dropped,
2856                   &dummy, &dummy, &dummy, &dummy) != 16) {
2857                 continue;
2858             }
2859             stats->rx_bytes = rx_bytes;
2860             stats->rx_packets = rx_packets;
2861             stats->rx_errs = rx_errs;
2862             stats->rx_dropped = rx_dropped;
2863             stats->tx_bytes = tx_bytes;
2864             stats->tx_packets = tx_packets;
2865             stats->tx_errs = tx_errs;
2866             stats->tx_dropped = tx_dropped;
2867             fclose(fp);
2868             g_free(line);
2869             return 0;
2870         }
2871     }
2872     fclose(fp);
2873     g_free(line);
2874     g_debug("/proc/net/dev: Interface '%s' not found", name);
2875 #else /* !CONFIG_LINUX */
2876     g_debug("Network stats reporting available only for Linux");
2877 #endif /* !CONFIG_LINUX */
2878     return -1;
2879 }
2880 
2881 #ifndef CONFIG_BSD
2882 /*
2883  * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
2884  * buffer with ETHER_ADDR_LEN length at least.
2885  *
2886  * Returns false in case of an error, otherwise true. "obtained" argument
2887  * is true if a MAC address was obtained successful, otherwise false.
2888  */
2889 bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
2890                        bool *obtained, Error **errp)
2891 {
2892     struct ifreq ifr;
2893     int sock;
2894 
2895     *obtained = false;
2896 
2897     /* we haven't obtained HW address yet */
2898     sock = socket(PF_INET, SOCK_STREAM, 0);
2899     if (sock == -1) {
2900         error_setg_errno(errp, errno, "failed to create socket");
2901         return false;
2902     }
2903 
2904     memset(&ifr, 0, sizeof(ifr));
2905     pstrcpy(ifr.ifr_name, IF_NAMESIZE, ifa->ifa_name);
2906     if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
2907         /*
2908          * We can't get the hw addr of this interface, but that's not a
2909          * fatal error.
2910          */
2911         if (errno == EADDRNOTAVAIL) {
2912             /* The interface doesn't have a hw addr (e.g. loopback). */
2913             g_debug("failed to get MAC address of %s: %s",
2914                     ifa->ifa_name, strerror(errno));
2915         } else{
2916             g_warning("failed to get MAC address of %s: %s",
2917                       ifa->ifa_name, strerror(errno));
2918         }
2919     } else {
2920 #ifdef CONFIG_SOLARIS
2921         memcpy(buf, &ifr.ifr_addr.sa_data, ETHER_ADDR_LEN);
2922 #else
2923         memcpy(buf, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
2924 #endif
2925         *obtained = true;
2926     }
2927     close(sock);
2928     return true;
2929 }
2930 #endif /* CONFIG_BSD */
2931 
2932 /*
2933  * Build information about guest interfaces
2934  */
2935 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
2936 {
2937     GuestNetworkInterfaceList *head = NULL, **tail = &head;
2938     struct ifaddrs *ifap, *ifa;
2939 
2940     if (getifaddrs(&ifap) < 0) {
2941         error_setg_errno(errp, errno, "getifaddrs failed");
2942         goto error;
2943     }
2944 
2945     for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
2946         GuestNetworkInterface *info;
2947         GuestIpAddressList **address_tail;
2948         GuestIpAddress *address_item = NULL;
2949         GuestNetworkInterfaceStat *interface_stat = NULL;
2950         char addr4[INET_ADDRSTRLEN];
2951         char addr6[INET6_ADDRSTRLEN];
2952         unsigned char mac_addr[ETHER_ADDR_LEN];
2953         bool obtained;
2954         void *p;
2955 
2956         g_debug("Processing %s interface", ifa->ifa_name);
2957 
2958         info = guest_find_interface(head, ifa->ifa_name);
2959 
2960         if (!info) {
2961             info = g_malloc0(sizeof(*info));
2962             info->name = g_strdup(ifa->ifa_name);
2963 
2964             QAPI_LIST_APPEND(tail, info);
2965         }
2966 
2967         if (!info->hardware_address) {
2968             if (!guest_get_hw_addr(ifa, mac_addr, &obtained, errp)) {
2969                 goto error;
2970             }
2971             if (obtained) {
2972                 info->hardware_address =
2973                     g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
2974                                     (int) mac_addr[0], (int) mac_addr[1],
2975                                     (int) mac_addr[2], (int) mac_addr[3],
2976                                     (int) mac_addr[4], (int) mac_addr[5]);
2977             }
2978         }
2979 
2980         if (ifa->ifa_addr &&
2981             ifa->ifa_addr->sa_family == AF_INET) {
2982             /* interface with IPv4 address */
2983             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
2984             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
2985                 error_setg_errno(errp, errno, "inet_ntop failed");
2986                 goto error;
2987             }
2988 
2989             address_item = g_malloc0(sizeof(*address_item));
2990             address_item->ip_address = g_strdup(addr4);
2991             address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
2992 
2993             if (ifa->ifa_netmask) {
2994                 /* Count the number of set bits in netmask.
2995                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
2996                 p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
2997                 address_item->prefix = ctpop32(((uint32_t *) p)[0]);
2998             }
2999         } else if (ifa->ifa_addr &&
3000                    ifa->ifa_addr->sa_family == AF_INET6) {
3001             /* interface with IPv6 address */
3002             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
3003             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
3004                 error_setg_errno(errp, errno, "inet_ntop failed");
3005                 goto error;
3006             }
3007 
3008             address_item = g_malloc0(sizeof(*address_item));
3009             address_item->ip_address = g_strdup(addr6);
3010             address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
3011 
3012             if (ifa->ifa_netmask) {
3013                 /* Count the number of set bits in netmask.
3014                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
3015                 p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
3016                 address_item->prefix =
3017                     ctpop32(((uint32_t *) p)[0]) +
3018                     ctpop32(((uint32_t *) p)[1]) +
3019                     ctpop32(((uint32_t *) p)[2]) +
3020                     ctpop32(((uint32_t *) p)[3]);
3021             }
3022         }
3023 
3024         if (!address_item) {
3025             continue;
3026         }
3027 
3028         address_tail = &info->ip_addresses;
3029         while (*address_tail) {
3030             address_tail = &(*address_tail)->next;
3031         }
3032         QAPI_LIST_APPEND(address_tail, address_item);
3033 
3034         info->has_ip_addresses = true;
3035 
3036         if (!info->statistics) {
3037             interface_stat = g_malloc0(sizeof(*interface_stat));
3038             if (guest_get_network_stats(info->name, interface_stat) == -1) {
3039                 g_free(interface_stat);
3040             } else {
3041                 info->statistics = interface_stat;
3042             }
3043         }
3044     }
3045 
3046     freeifaddrs(ifap);
3047     return head;
3048 
3049 error:
3050     freeifaddrs(ifap);
3051     qapi_free_GuestNetworkInterfaceList(head);
3052     return NULL;
3053 }
3054 
3055 #else
3056 
3057 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
3058 {
3059     error_setg(errp, QERR_UNSUPPORTED);
3060     return NULL;
3061 }
3062 
3063 #endif /* HAVE_GETIFADDRS */
3064 
3065 #if !defined(CONFIG_FSFREEZE)
3066 
3067 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
3068 {
3069     error_setg(errp, QERR_UNSUPPORTED);
3070     return NULL;
3071 }
3072 
3073 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
3074 {
3075     error_setg(errp, QERR_UNSUPPORTED);
3076 
3077     return 0;
3078 }
3079 
3080 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
3081 {
3082     error_setg(errp, QERR_UNSUPPORTED);
3083 
3084     return 0;
3085 }
3086 
3087 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
3088                                        strList *mountpoints,
3089                                        Error **errp)
3090 {
3091     error_setg(errp, QERR_UNSUPPORTED);
3092 
3093     return 0;
3094 }
3095 
3096 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
3097 {
3098     error_setg(errp, QERR_UNSUPPORTED);
3099 
3100     return 0;
3101 }
3102 
3103 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
3104 {
3105     error_setg(errp, QERR_UNSUPPORTED);
3106     return NULL;
3107 }
3108 
3109 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
3110 {
3111     error_setg(errp, QERR_UNSUPPORTED);
3112     return NULL;
3113 }
3114 
3115 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
3116 {
3117     error_setg(errp, QERR_UNSUPPORTED);
3118     return NULL;
3119 }
3120 
3121 #endif /* CONFIG_FSFREEZE */
3122 
3123 #if !defined(CONFIG_FSTRIM)
3124 GuestFilesystemTrimResponse *
3125 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
3126 {
3127     error_setg(errp, QERR_UNSUPPORTED);
3128     return NULL;
3129 }
3130 #endif
3131 
3132 /* add unsupported commands to the list of blocked RPCs */
3133 GList *ga_command_init_blockedrpcs(GList *blockedrpcs)
3134 {
3135 #if !defined(__linux__)
3136     {
3137         const char *list[] = {
3138             "guest-suspend-disk", "guest-suspend-ram",
3139             "guest-suspend-hybrid", "guest-get-vcpus", "guest-set-vcpus",
3140             "guest-get-memory-blocks", "guest-set-memory-blocks",
3141             "guest-get-memory-block-size", "guest-get-memory-block-info",
3142             NULL};
3143         char **p = (char **)list;
3144 
3145         while (*p) {
3146             blockedrpcs = g_list_append(blockedrpcs, g_strdup(*p++));
3147         }
3148     }
3149 #endif
3150 
3151 #if !defined(HAVE_GETIFADDRS)
3152     blockedrpcs = g_list_append(blockedrpcs,
3153                               g_strdup("guest-network-get-interfaces"));
3154 #endif
3155 
3156 #if !defined(CONFIG_FSFREEZE)
3157     {
3158         const char *list[] = {
3159             "guest-get-fsinfo", "guest-fsfreeze-status",
3160             "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list",
3161             "guest-fsfreeze-thaw", "guest-get-fsinfo",
3162             "guest-get-disks", NULL};
3163         char **p = (char **)list;
3164 
3165         while (*p) {
3166             blockedrpcs = g_list_append(blockedrpcs, g_strdup(*p++));
3167         }
3168     }
3169 #endif
3170 
3171 #if !defined(CONFIG_FSTRIM)
3172     blockedrpcs = g_list_append(blockedrpcs, g_strdup("guest-fstrim"));
3173 #endif
3174 
3175     blockedrpcs = g_list_append(blockedrpcs, g_strdup("guest-get-devices"));
3176 
3177     return blockedrpcs;
3178 }
3179 
3180 /* register init/cleanup routines for stateful command groups */
3181 void ga_command_state_init(GAState *s, GACommandState *cs)
3182 {
3183 #if defined(CONFIG_FSFREEZE)
3184     ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
3185 #endif
3186 }
3187 
3188 #ifdef HAVE_UTMPX
3189 
3190 #define QGA_MICRO_SECOND_TO_SECOND 1000000
3191 
3192 static double ga_get_login_time(struct utmpx *user_info)
3193 {
3194     double seconds = (double)user_info->ut_tv.tv_sec;
3195     double useconds = (double)user_info->ut_tv.tv_usec;
3196     useconds /= QGA_MICRO_SECOND_TO_SECOND;
3197     return seconds + useconds;
3198 }
3199 
3200 GuestUserList *qmp_guest_get_users(Error **errp)
3201 {
3202     GHashTable *cache = NULL;
3203     GuestUserList *head = NULL, **tail = &head;
3204     struct utmpx *user_info = NULL;
3205     gpointer value = NULL;
3206     GuestUser *user = NULL;
3207     double login_time = 0;
3208 
3209     cache = g_hash_table_new(g_str_hash, g_str_equal);
3210     setutxent();
3211 
3212     for (;;) {
3213         user_info = getutxent();
3214         if (user_info == NULL) {
3215             break;
3216         } else if (user_info->ut_type != USER_PROCESS) {
3217             continue;
3218         } else if (g_hash_table_contains(cache, user_info->ut_user)) {
3219             value = g_hash_table_lookup(cache, user_info->ut_user);
3220             user = (GuestUser *)value;
3221             login_time = ga_get_login_time(user_info);
3222             /* We're ensuring the earliest login time to be sent */
3223             if (login_time < user->login_time) {
3224                 user->login_time = login_time;
3225             }
3226             continue;
3227         }
3228 
3229         user = g_new0(GuestUser, 1);
3230         user->user = g_strdup(user_info->ut_user);
3231         user->login_time = ga_get_login_time(user_info);
3232 
3233         g_hash_table_insert(cache, user->user, user);
3234 
3235         QAPI_LIST_APPEND(tail, user);
3236     }
3237     endutxent();
3238     g_hash_table_destroy(cache);
3239     return head;
3240 }
3241 
3242 #else
3243 
3244 GuestUserList *qmp_guest_get_users(Error **errp)
3245 {
3246     error_setg(errp, QERR_UNSUPPORTED);
3247     return NULL;
3248 }
3249 
3250 #endif
3251 
3252 /* Replace escaped special characters with their real values. The replacement
3253  * is done in place -- returned value is in the original string.
3254  */
3255 static void ga_osrelease_replace_special(gchar *value)
3256 {
3257     gchar *p, *p2, quote;
3258 
3259     /* Trim the string at first space or semicolon if it is not enclosed in
3260      * single or double quotes. */
3261     if ((value[0] != '"') || (value[0] == '\'')) {
3262         p = strchr(value, ' ');
3263         if (p != NULL) {
3264             *p = 0;
3265         }
3266         p = strchr(value, ';');
3267         if (p != NULL) {
3268             *p = 0;
3269         }
3270         return;
3271     }
3272 
3273     quote = value[0];
3274     p2 = value;
3275     p = value + 1;
3276     while (*p != 0) {
3277         if (*p == '\\') {
3278             p++;
3279             switch (*p) {
3280             case '$':
3281             case '\'':
3282             case '"':
3283             case '\\':
3284             case '`':
3285                 break;
3286             default:
3287                 /* Keep literal backslash followed by whatever is there */
3288                 p--;
3289                 break;
3290             }
3291         } else if (*p == quote) {
3292             *p2 = 0;
3293             break;
3294         }
3295         *(p2++) = *(p++);
3296     }
3297 }
3298 
3299 static GKeyFile *ga_parse_osrelease(const char *fname)
3300 {
3301     gchar *content = NULL;
3302     gchar *content2 = NULL;
3303     GError *err = NULL;
3304     GKeyFile *keys = g_key_file_new();
3305     const char *group = "[os-release]\n";
3306 
3307     if (!g_file_get_contents(fname, &content, NULL, &err)) {
3308         slog("failed to read '%s', error: %s", fname, err->message);
3309         goto fail;
3310     }
3311 
3312     if (!g_utf8_validate(content, -1, NULL)) {
3313         slog("file is not utf-8 encoded: %s", fname);
3314         goto fail;
3315     }
3316     content2 = g_strdup_printf("%s%s", group, content);
3317 
3318     if (!g_key_file_load_from_data(keys, content2, -1, G_KEY_FILE_NONE,
3319                                    &err)) {
3320         slog("failed to parse file '%s', error: %s", fname, err->message);
3321         goto fail;
3322     }
3323 
3324     g_free(content);
3325     g_free(content2);
3326     return keys;
3327 
3328 fail:
3329     g_error_free(err);
3330     g_free(content);
3331     g_free(content2);
3332     g_key_file_free(keys);
3333     return NULL;
3334 }
3335 
3336 GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
3337 {
3338     GuestOSInfo *info = NULL;
3339     struct utsname kinfo;
3340     GKeyFile *osrelease = NULL;
3341     const char *qga_os_release = g_getenv("QGA_OS_RELEASE");
3342 
3343     info = g_new0(GuestOSInfo, 1);
3344 
3345     if (uname(&kinfo) != 0) {
3346         error_setg_errno(errp, errno, "uname failed");
3347     } else {
3348         info->kernel_version = g_strdup(kinfo.version);
3349         info->kernel_release = g_strdup(kinfo.release);
3350         info->machine = g_strdup(kinfo.machine);
3351     }
3352 
3353     if (qga_os_release != NULL) {
3354         osrelease = ga_parse_osrelease(qga_os_release);
3355     } else {
3356         osrelease = ga_parse_osrelease("/etc/os-release");
3357         if (osrelease == NULL) {
3358             osrelease = ga_parse_osrelease("/usr/lib/os-release");
3359         }
3360     }
3361 
3362     if (osrelease != NULL) {
3363         char *value;
3364 
3365 #define GET_FIELD(field, osfield) do { \
3366     value = g_key_file_get_value(osrelease, "os-release", osfield, NULL); \
3367     if (value != NULL) { \
3368         ga_osrelease_replace_special(value); \
3369         info->field = value; \
3370     } \
3371 } while (0)
3372         GET_FIELD(id, "ID");
3373         GET_FIELD(name, "NAME");
3374         GET_FIELD(pretty_name, "PRETTY_NAME");
3375         GET_FIELD(version, "VERSION");
3376         GET_FIELD(version_id, "VERSION_ID");
3377         GET_FIELD(variant, "VARIANT");
3378         GET_FIELD(variant_id, "VARIANT_ID");
3379 #undef GET_FIELD
3380 
3381         g_key_file_free(osrelease);
3382     }
3383 
3384     return info;
3385 }
3386 
3387 GuestDeviceInfoList *qmp_guest_get_devices(Error **errp)
3388 {
3389     error_setg(errp, QERR_UNSUPPORTED);
3390 
3391     return NULL;
3392 }
3393 
3394 #ifndef HOST_NAME_MAX
3395 # ifdef _POSIX_HOST_NAME_MAX
3396 #  define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
3397 # else
3398 #  define HOST_NAME_MAX 255
3399 # endif
3400 #endif
3401 
3402 char *qga_get_host_name(Error **errp)
3403 {
3404     long len = -1;
3405     g_autofree char *hostname = NULL;
3406 
3407 #ifdef _SC_HOST_NAME_MAX
3408     len = sysconf(_SC_HOST_NAME_MAX);
3409 #endif /* _SC_HOST_NAME_MAX */
3410 
3411     if (len < 0) {
3412         len = HOST_NAME_MAX;
3413     }
3414 
3415     /* Unfortunately, gethostname() below does not guarantee a
3416      * NULL terminated string. Therefore, allocate one byte more
3417      * to be sure. */
3418     hostname = g_new0(char, len + 1);
3419 
3420     if (gethostname(hostname, len) < 0) {
3421         error_setg_errno(errp, errno,
3422                          "cannot get hostname");
3423         return NULL;
3424     }
3425 
3426     return g_steal_pointer(&hostname);
3427 }
3428