xref: /qemu/qga/commands-posix.c (revision b88651cb)
1 /*
2  * QEMU Guest Agent POSIX-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include <sys/ioctl.h>
16 #include <sys/utsname.h>
17 #include <sys/wait.h>
18 #include <dirent.h>
19 #include "guest-agent-core.h"
20 #include "qga-qapi-commands.h"
21 #include "qapi/error.h"
22 #include "qapi/qmp/qerror.h"
23 #include "qemu/queue.h"
24 #include "qemu/host-utils.h"
25 #include "qemu/sockets.h"
26 #include "qemu/base64.h"
27 #include "qemu/cutils.h"
28 #include "commands-common.h"
29 #include "block/nvme.h"
30 
31 #ifdef HAVE_UTMPX
32 #include <utmpx.h>
33 #endif
34 
35 #if defined(__linux__)
36 #include <mntent.h>
37 #include <linux/fs.h>
38 #include <sys/statvfs.h>
39 #include <linux/nvme_ioctl.h>
40 
41 #ifdef CONFIG_LIBUDEV
42 #include <libudev.h>
43 #endif
44 
45 #ifdef FIFREEZE
46 #define CONFIG_FSFREEZE
47 #endif
48 #ifdef FITRIM
49 #define CONFIG_FSTRIM
50 #endif
51 #endif
52 
53 #ifdef HAVE_GETIFADDRS
54 #include <arpa/inet.h>
55 #include <sys/socket.h>
56 #include <net/if.h>
57 #include <sys/types.h>
58 #include <ifaddrs.h>
59 #ifdef CONFIG_SOLARIS
60 #include <sys/sockio.h>
61 #endif
62 #endif
63 
64 static void ga_wait_child(pid_t pid, int *status, Error **errp)
65 {
66     pid_t rpid;
67 
68     *status = 0;
69 
70     do {
71         rpid = waitpid(pid, status, 0);
72     } while (rpid == -1 && errno == EINTR);
73 
74     if (rpid == -1) {
75         error_setg_errno(errp, errno, "failed to wait for child (pid: %d)",
76                          pid);
77         return;
78     }
79 
80     g_assert(rpid == pid);
81 }
82 
83 void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp)
84 {
85     const char *shutdown_flag;
86     Error *local_err = NULL;
87     pid_t pid;
88     int status;
89 
90 #ifdef CONFIG_SOLARIS
91     const char *powerdown_flag = "-i5";
92     const char *halt_flag = "-i0";
93     const char *reboot_flag = "-i6";
94 #else
95     const char *powerdown_flag = "-P";
96     const char *halt_flag = "-H";
97     const char *reboot_flag = "-r";
98 #endif
99 
100     slog("guest-shutdown called, mode: %s", mode);
101     if (!has_mode || strcmp(mode, "powerdown") == 0) {
102         shutdown_flag = powerdown_flag;
103     } else if (strcmp(mode, "halt") == 0) {
104         shutdown_flag = halt_flag;
105     } else if (strcmp(mode, "reboot") == 0) {
106         shutdown_flag = reboot_flag;
107     } else {
108         error_setg(errp,
109                    "mode is invalid (valid values are: halt|powerdown|reboot");
110         return;
111     }
112 
113     pid = fork();
114     if (pid == 0) {
115         /* child, start the shutdown */
116         setsid();
117         reopen_fd_to_null(0);
118         reopen_fd_to_null(1);
119         reopen_fd_to_null(2);
120 
121 #ifdef CONFIG_SOLARIS
122         execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y",
123               "hypervisor initiated shutdown", (char *)NULL);
124 #else
125         execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
126                "hypervisor initiated shutdown", (char *)NULL);
127 #endif
128         _exit(EXIT_FAILURE);
129     } else if (pid < 0) {
130         error_setg_errno(errp, errno, "failed to create child process");
131         return;
132     }
133 
134     ga_wait_child(pid, &status, &local_err);
135     if (local_err) {
136         error_propagate(errp, local_err);
137         return;
138     }
139 
140     if (!WIFEXITED(status)) {
141         error_setg(errp, "child process has terminated abnormally");
142         return;
143     }
144 
145     if (WEXITSTATUS(status)) {
146         error_setg(errp, "child process has failed to shutdown");
147         return;
148     }
149 
150     /* succeeded */
151 }
152 
153 void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
154 {
155     int ret;
156     int status;
157     pid_t pid;
158     Error *local_err = NULL;
159     struct timeval tv;
160     static const char hwclock_path[] = "/sbin/hwclock";
161     static int hwclock_available = -1;
162 
163     if (hwclock_available < 0) {
164         hwclock_available = (access(hwclock_path, X_OK) == 0);
165     }
166 
167     if (!hwclock_available) {
168         error_setg(errp, QERR_UNSUPPORTED);
169         return;
170     }
171 
172     /* If user has passed a time, validate and set it. */
173     if (has_time) {
174         GDate date = { 0, };
175 
176         /* year-2038 will overflow in case time_t is 32bit */
177         if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
178             error_setg(errp, "Time %" PRId64 " is too large", time_ns);
179             return;
180         }
181 
182         tv.tv_sec = time_ns / 1000000000;
183         tv.tv_usec = (time_ns % 1000000000) / 1000;
184         g_date_set_time_t(&date, tv.tv_sec);
185         if (date.year < 1970 || date.year >= 2070) {
186             error_setg_errno(errp, errno, "Invalid time");
187             return;
188         }
189 
190         ret = settimeofday(&tv, NULL);
191         if (ret < 0) {
192             error_setg_errno(errp, errno, "Failed to set time to guest");
193             return;
194         }
195     }
196 
197     /* Now, if user has passed a time to set and the system time is set, we
198      * just need to synchronize the hardware clock. However, if no time was
199      * passed, user is requesting the opposite: set the system time from the
200      * hardware clock (RTC). */
201     pid = fork();
202     if (pid == 0) {
203         setsid();
204         reopen_fd_to_null(0);
205         reopen_fd_to_null(1);
206         reopen_fd_to_null(2);
207 
208         /* Use '/sbin/hwclock -w' to set RTC from the system time,
209          * or '/sbin/hwclock -s' to set the system time from RTC. */
210         execl(hwclock_path, "hwclock", has_time ? "-w" : "-s", NULL);
211         _exit(EXIT_FAILURE);
212     } else if (pid < 0) {
213         error_setg_errno(errp, errno, "failed to create child process");
214         return;
215     }
216 
217     ga_wait_child(pid, &status, &local_err);
218     if (local_err) {
219         error_propagate(errp, local_err);
220         return;
221     }
222 
223     if (!WIFEXITED(status)) {
224         error_setg(errp, "child process has terminated abnormally");
225         return;
226     }
227 
228     if (WEXITSTATUS(status)) {
229         error_setg(errp, "hwclock failed to set hardware clock to system time");
230         return;
231     }
232 }
233 
234 typedef enum {
235     RW_STATE_NEW,
236     RW_STATE_READING,
237     RW_STATE_WRITING,
238 } RwState;
239 
240 struct GuestFileHandle {
241     uint64_t id;
242     FILE *fh;
243     RwState state;
244     QTAILQ_ENTRY(GuestFileHandle) next;
245 };
246 
247 static struct {
248     QTAILQ_HEAD(, GuestFileHandle) filehandles;
249 } guest_file_state = {
250     .filehandles = QTAILQ_HEAD_INITIALIZER(guest_file_state.filehandles),
251 };
252 
253 static int64_t guest_file_handle_add(FILE *fh, Error **errp)
254 {
255     GuestFileHandle *gfh;
256     int64_t handle;
257 
258     handle = ga_get_fd_handle(ga_state, errp);
259     if (handle < 0) {
260         return -1;
261     }
262 
263     gfh = g_new0(GuestFileHandle, 1);
264     gfh->id = handle;
265     gfh->fh = fh;
266     QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
267 
268     return handle;
269 }
270 
271 GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
272 {
273     GuestFileHandle *gfh;
274 
275     QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
276     {
277         if (gfh->id == id) {
278             return gfh;
279         }
280     }
281 
282     error_setg(errp, "handle '%" PRId64 "' has not been found", id);
283     return NULL;
284 }
285 
286 typedef const char * const ccpc;
287 
288 #ifndef O_BINARY
289 #define O_BINARY 0
290 #endif
291 
292 /* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
293 static const struct {
294     ccpc *forms;
295     int oflag_base;
296 } guest_file_open_modes[] = {
297     { (ccpc[]){ "r",          NULL }, O_RDONLY                                 },
298     { (ccpc[]){ "rb",         NULL }, O_RDONLY                      | O_BINARY },
299     { (ccpc[]){ "w",          NULL }, O_WRONLY | O_CREAT | O_TRUNC             },
300     { (ccpc[]){ "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  | O_BINARY },
301     { (ccpc[]){ "a",          NULL }, O_WRONLY | O_CREAT | O_APPEND            },
302     { (ccpc[]){ "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND | O_BINARY },
303     { (ccpc[]){ "r+",         NULL }, O_RDWR                                   },
304     { (ccpc[]){ "rb+", "r+b", NULL }, O_RDWR                        | O_BINARY },
305     { (ccpc[]){ "w+",         NULL }, O_RDWR   | O_CREAT | O_TRUNC             },
306     { (ccpc[]){ "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  | O_BINARY },
307     { (ccpc[]){ "a+",         NULL }, O_RDWR   | O_CREAT | O_APPEND            },
308     { (ccpc[]){ "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND | O_BINARY }
309 };
310 
311 static int
312 find_open_flag(const char *mode_str, Error **errp)
313 {
314     unsigned mode;
315 
316     for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
317         ccpc *form;
318 
319         form = guest_file_open_modes[mode].forms;
320         while (*form != NULL && strcmp(*form, mode_str) != 0) {
321             ++form;
322         }
323         if (*form != NULL) {
324             break;
325         }
326     }
327 
328     if (mode == ARRAY_SIZE(guest_file_open_modes)) {
329         error_setg(errp, "invalid file open mode '%s'", mode_str);
330         return -1;
331     }
332     return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
333 }
334 
335 #define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
336                                S_IRGRP | S_IWGRP | \
337                                S_IROTH | S_IWOTH)
338 
339 static FILE *
340 safe_open_or_create(const char *path, const char *mode, Error **errp)
341 {
342     Error *local_err = NULL;
343     int oflag;
344 
345     oflag = find_open_flag(mode, &local_err);
346     if (local_err == NULL) {
347         int fd;
348 
349         /* If the caller wants / allows creation of a new file, we implement it
350          * with a two step process: open() + (open() / fchmod()).
351          *
352          * First we insist on creating the file exclusively as a new file. If
353          * that succeeds, we're free to set any file-mode bits on it. (The
354          * motivation is that we want to set those file-mode bits independently
355          * of the current umask.)
356          *
357          * If the exclusive creation fails because the file already exists
358          * (EEXIST is not possible for any other reason), we just attempt to
359          * open the file, but in this case we won't be allowed to change the
360          * file-mode bits on the preexistent file.
361          *
362          * The pathname should never disappear between the two open()s in
363          * practice. If it happens, then someone very likely tried to race us.
364          * In this case just go ahead and report the ENOENT from the second
365          * open() to the caller.
366          *
367          * If the caller wants to open a preexistent file, then the first
368          * open() is decisive and its third argument is ignored, and the second
369          * open() and the fchmod() are never called.
370          */
371         fd = open(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
372         if (fd == -1 && errno == EEXIST) {
373             oflag &= ~(unsigned)O_CREAT;
374             fd = open(path, oflag);
375         }
376 
377         if (fd == -1) {
378             error_setg_errno(&local_err, errno, "failed to open file '%s' "
379                              "(mode: '%s')", path, mode);
380         } else {
381             qemu_set_cloexec(fd);
382 
383             if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
384                 error_setg_errno(&local_err, errno, "failed to set permission "
385                                  "0%03o on new file '%s' (mode: '%s')",
386                                  (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
387             } else {
388                 FILE *f;
389 
390                 f = fdopen(fd, mode);
391                 if (f == NULL) {
392                     error_setg_errno(&local_err, errno, "failed to associate "
393                                      "stdio stream with file descriptor %d, "
394                                      "file '%s' (mode: '%s')", fd, path, mode);
395                 } else {
396                     return f;
397                 }
398             }
399 
400             close(fd);
401             if (oflag & O_CREAT) {
402                 unlink(path);
403             }
404         }
405     }
406 
407     error_propagate(errp, local_err);
408     return NULL;
409 }
410 
411 int64_t qmp_guest_file_open(const char *path, bool has_mode, const char *mode,
412                             Error **errp)
413 {
414     FILE *fh;
415     Error *local_err = NULL;
416     int64_t handle;
417 
418     if (!has_mode) {
419         mode = "r";
420     }
421     slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
422     fh = safe_open_or_create(path, mode, &local_err);
423     if (local_err != NULL) {
424         error_propagate(errp, local_err);
425         return -1;
426     }
427 
428     /* set fd non-blocking to avoid common use cases (like reading from a
429      * named pipe) from hanging the agent
430      */
431     if (!g_unix_set_fd_nonblocking(fileno(fh), true, NULL)) {
432         fclose(fh);
433         error_setg_errno(errp, errno, "Failed to set FD nonblocking");
434         return -1;
435     }
436 
437     handle = guest_file_handle_add(fh, errp);
438     if (handle < 0) {
439         fclose(fh);
440         return -1;
441     }
442 
443     slog("guest-file-open, handle: %" PRId64, handle);
444     return handle;
445 }
446 
447 void qmp_guest_file_close(int64_t handle, Error **errp)
448 {
449     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
450     int ret;
451 
452     slog("guest-file-close called, handle: %" PRId64, handle);
453     if (!gfh) {
454         return;
455     }
456 
457     ret = fclose(gfh->fh);
458     if (ret == EOF) {
459         error_setg_errno(errp, errno, "failed to close handle");
460         return;
461     }
462 
463     QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
464     g_free(gfh);
465 }
466 
467 GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
468                                       int64_t count, Error **errp)
469 {
470     GuestFileRead *read_data = NULL;
471     guchar *buf;
472     FILE *fh = gfh->fh;
473     size_t read_count;
474 
475     /* explicitly flush when switching from writing to reading */
476     if (gfh->state == RW_STATE_WRITING) {
477         int ret = fflush(fh);
478         if (ret == EOF) {
479             error_setg_errno(errp, errno, "failed to flush file");
480             return NULL;
481         }
482         gfh->state = RW_STATE_NEW;
483     }
484 
485     buf = g_malloc0(count + 1);
486     read_count = fread(buf, 1, count, fh);
487     if (ferror(fh)) {
488         error_setg_errno(errp, errno, "failed to read file");
489     } else {
490         buf[read_count] = 0;
491         read_data = g_new0(GuestFileRead, 1);
492         read_data->count = read_count;
493         read_data->eof = feof(fh);
494         if (read_count) {
495             read_data->buf_b64 = g_base64_encode(buf, read_count);
496         }
497         gfh->state = RW_STATE_READING;
498     }
499     g_free(buf);
500     clearerr(fh);
501 
502     return read_data;
503 }
504 
505 GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
506                                      bool has_count, int64_t count,
507                                      Error **errp)
508 {
509     GuestFileWrite *write_data = NULL;
510     guchar *buf;
511     gsize buf_len;
512     int write_count;
513     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
514     FILE *fh;
515 
516     if (!gfh) {
517         return NULL;
518     }
519 
520     fh = gfh->fh;
521 
522     if (gfh->state == RW_STATE_READING) {
523         int ret = fseek(fh, 0, SEEK_CUR);
524         if (ret == -1) {
525             error_setg_errno(errp, errno, "failed to seek file");
526             return NULL;
527         }
528         gfh->state = RW_STATE_NEW;
529     }
530 
531     buf = qbase64_decode(buf_b64, -1, &buf_len, errp);
532     if (!buf) {
533         return NULL;
534     }
535 
536     if (!has_count) {
537         count = buf_len;
538     } else if (count < 0 || count > buf_len) {
539         error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
540                    count);
541         g_free(buf);
542         return NULL;
543     }
544 
545     write_count = fwrite(buf, 1, count, fh);
546     if (ferror(fh)) {
547         error_setg_errno(errp, errno, "failed to write to file");
548         slog("guest-file-write failed, handle: %" PRId64, handle);
549     } else {
550         write_data = g_new0(GuestFileWrite, 1);
551         write_data->count = write_count;
552         write_data->eof = feof(fh);
553         gfh->state = RW_STATE_WRITING;
554     }
555     g_free(buf);
556     clearerr(fh);
557 
558     return write_data;
559 }
560 
561 struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
562                                           GuestFileWhence *whence_code,
563                                           Error **errp)
564 {
565     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
566     GuestFileSeek *seek_data = NULL;
567     FILE *fh;
568     int ret;
569     int whence;
570     Error *err = NULL;
571 
572     if (!gfh) {
573         return NULL;
574     }
575 
576     /* We stupidly exposed 'whence':'int' in our qapi */
577     whence = ga_parse_whence(whence_code, &err);
578     if (err) {
579         error_propagate(errp, err);
580         return NULL;
581     }
582 
583     fh = gfh->fh;
584     ret = fseek(fh, offset, whence);
585     if (ret == -1) {
586         error_setg_errno(errp, errno, "failed to seek file");
587         if (errno == ESPIPE) {
588             /* file is non-seekable, stdio shouldn't be buffering anyways */
589             gfh->state = RW_STATE_NEW;
590         }
591     } else {
592         seek_data = g_new0(GuestFileSeek, 1);
593         seek_data->position = ftell(fh);
594         seek_data->eof = feof(fh);
595         gfh->state = RW_STATE_NEW;
596     }
597     clearerr(fh);
598 
599     return seek_data;
600 }
601 
602 void qmp_guest_file_flush(int64_t handle, Error **errp)
603 {
604     GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
605     FILE *fh;
606     int ret;
607 
608     if (!gfh) {
609         return;
610     }
611 
612     fh = gfh->fh;
613     ret = fflush(fh);
614     if (ret == EOF) {
615         error_setg_errno(errp, errno, "failed to flush file");
616     } else {
617         gfh->state = RW_STATE_NEW;
618     }
619 }
620 
621 /* linux-specific implementations. avoid this if at all possible. */
622 #if defined(__linux__)
623 
624 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
625 typedef struct FsMount {
626     char *dirname;
627     char *devtype;
628     unsigned int devmajor, devminor;
629     QTAILQ_ENTRY(FsMount) next;
630 } FsMount;
631 
632 typedef QTAILQ_HEAD(FsMountList, FsMount) FsMountList;
633 
634 static void free_fs_mount_list(FsMountList *mounts)
635 {
636      FsMount *mount, *temp;
637 
638      if (!mounts) {
639          return;
640      }
641 
642      QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
643          QTAILQ_REMOVE(mounts, mount, next);
644          g_free(mount->dirname);
645          g_free(mount->devtype);
646          g_free(mount);
647      }
648 }
649 
650 static int dev_major_minor(const char *devpath,
651                            unsigned int *devmajor, unsigned int *devminor)
652 {
653     struct stat st;
654 
655     *devmajor = 0;
656     *devminor = 0;
657 
658     if (stat(devpath, &st) < 0) {
659         slog("failed to stat device file '%s': %s", devpath, strerror(errno));
660         return -1;
661     }
662     if (S_ISDIR(st.st_mode)) {
663         /* It is bind mount */
664         return -2;
665     }
666     if (S_ISBLK(st.st_mode)) {
667         *devmajor = major(st.st_rdev);
668         *devminor = minor(st.st_rdev);
669         return 0;
670     }
671     return -1;
672 }
673 
674 /*
675  * Walk the mount table and build a list of local file systems
676  */
677 static void build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
678 {
679     struct mntent *ment;
680     FsMount *mount;
681     char const *mtab = "/proc/self/mounts";
682     FILE *fp;
683     unsigned int devmajor, devminor;
684 
685     fp = setmntent(mtab, "r");
686     if (!fp) {
687         error_setg(errp, "failed to open mtab file: '%s'", mtab);
688         return;
689     }
690 
691     while ((ment = getmntent(fp))) {
692         /*
693          * An entry which device name doesn't start with a '/' is
694          * either a dummy file system or a network file system.
695          * Add special handling for smbfs and cifs as is done by
696          * coreutils as well.
697          */
698         if ((ment->mnt_fsname[0] != '/') ||
699             (strcmp(ment->mnt_type, "smbfs") == 0) ||
700             (strcmp(ment->mnt_type, "cifs") == 0)) {
701             continue;
702         }
703         if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
704             /* Skip bind mounts */
705             continue;
706         }
707 
708         mount = g_new0(FsMount, 1);
709         mount->dirname = g_strdup(ment->mnt_dir);
710         mount->devtype = g_strdup(ment->mnt_type);
711         mount->devmajor = devmajor;
712         mount->devminor = devminor;
713 
714         QTAILQ_INSERT_TAIL(mounts, mount, next);
715     }
716 
717     endmntent(fp);
718 }
719 
720 static void decode_mntname(char *name, int len)
721 {
722     int i, j = 0;
723     for (i = 0; i <= len; i++) {
724         if (name[i] != '\\') {
725             name[j++] = name[i];
726         } else if (name[i + 1] == '\\') {
727             name[j++] = '\\';
728             i++;
729         } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
730                    name[i + 2] >= '0' && name[i + 2] <= '7' &&
731                    name[i + 3] >= '0' && name[i + 3] <= '7') {
732             name[j++] = (name[i + 1] - '0') * 64 +
733                         (name[i + 2] - '0') * 8 +
734                         (name[i + 3] - '0');
735             i += 3;
736         } else {
737             name[j++] = name[i];
738         }
739     }
740 }
741 
742 static void build_fs_mount_list(FsMountList *mounts, Error **errp)
743 {
744     FsMount *mount;
745     char const *mountinfo = "/proc/self/mountinfo";
746     FILE *fp;
747     char *line = NULL, *dash;
748     size_t n;
749     char check;
750     unsigned int devmajor, devminor;
751     int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
752 
753     fp = fopen(mountinfo, "r");
754     if (!fp) {
755         build_fs_mount_list_from_mtab(mounts, errp);
756         return;
757     }
758 
759     while (getline(&line, &n, fp) != -1) {
760         ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
761                      &devmajor, &devminor, &dir_s, &dir_e, &check);
762         if (ret < 3) {
763             continue;
764         }
765         dash = strstr(line + dir_e, " - ");
766         if (!dash) {
767             continue;
768         }
769         ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
770                      &type_s, &type_e, &dev_s, &dev_e, &check);
771         if (ret < 1) {
772             continue;
773         }
774         line[dir_e] = 0;
775         dash[type_e] = 0;
776         dash[dev_e] = 0;
777         decode_mntname(line + dir_s, dir_e - dir_s);
778         decode_mntname(dash + dev_s, dev_e - dev_s);
779         if (devmajor == 0) {
780             /* btrfs reports major number = 0 */
781             if (strcmp("btrfs", dash + type_s) != 0 ||
782                 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
783                 continue;
784             }
785         }
786 
787         mount = g_new0(FsMount, 1);
788         mount->dirname = g_strdup(line + dir_s);
789         mount->devtype = g_strdup(dash + type_s);
790         mount->devmajor = devmajor;
791         mount->devminor = devminor;
792 
793         QTAILQ_INSERT_TAIL(mounts, mount, next);
794     }
795     free(line);
796 
797     fclose(fp);
798 }
799 #endif
800 
801 #if defined(CONFIG_FSFREEZE)
802 
803 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
804 {
805     char *path;
806     char *dpath;
807     char *driver = NULL;
808     char buf[PATH_MAX];
809     ssize_t len;
810 
811     path = g_strndup(syspath, pathlen);
812     dpath = g_strdup_printf("%s/driver", path);
813     len = readlink(dpath, buf, sizeof(buf) - 1);
814     if (len != -1) {
815         buf[len] = 0;
816         driver = g_path_get_basename(buf);
817     }
818     g_free(dpath);
819     g_free(path);
820     return driver;
821 }
822 
823 static int compare_uint(const void *_a, const void *_b)
824 {
825     unsigned int a = *(unsigned int *)_a;
826     unsigned int b = *(unsigned int *)_b;
827 
828     return a < b ? -1 : a > b ? 1 : 0;
829 }
830 
831 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
832 static int build_hosts(char const *syspath, char const *host, bool ata,
833                        unsigned int *hosts, int hosts_max, Error **errp)
834 {
835     char *path;
836     DIR *dir;
837     struct dirent *entry;
838     int i = 0;
839 
840     path = g_strndup(syspath, host - syspath);
841     dir = opendir(path);
842     if (!dir) {
843         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
844         g_free(path);
845         return -1;
846     }
847 
848     while (i < hosts_max) {
849         entry = readdir(dir);
850         if (!entry) {
851             break;
852         }
853         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
854             ++i;
855         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
856             ++i;
857         }
858     }
859 
860     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
861 
862     g_free(path);
863     closedir(dir);
864     return i;
865 }
866 
867 /*
868  * Store disk device info for devices on the PCI bus.
869  * Returns true if information has been stored, or false for failure.
870  */
871 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
872                                            GuestDiskAddress *disk,
873                                            Error **errp)
874 {
875     unsigned int pci[4], host, hosts[8], tgt[3];
876     int i, nhosts = 0, pcilen;
877     GuestPCIAddress *pciaddr = disk->pci_controller;
878     bool has_ata = false, has_host = false, has_tgt = false;
879     char *p, *q, *driver = NULL;
880     bool ret = false;
881 
882     p = strstr(syspath, "/devices/pci");
883     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
884                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
885         g_debug("only pci device is supported: sysfs path '%s'", syspath);
886         return false;
887     }
888 
889     p += 12 + pcilen;
890     while (true) {
891         driver = get_pci_driver(syspath, p - syspath, errp);
892         if (driver && (g_str_equal(driver, "ata_piix") ||
893                        g_str_equal(driver, "sym53c8xx") ||
894                        g_str_equal(driver, "virtio-pci") ||
895                        g_str_equal(driver, "ahci") ||
896                        g_str_equal(driver, "nvme"))) {
897             break;
898         }
899 
900         g_free(driver);
901         if (sscanf(p, "/%x:%x:%x.%x%n",
902                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
903             p += pcilen;
904             continue;
905         }
906 
907         g_debug("unsupported driver or sysfs path '%s'", syspath);
908         return false;
909     }
910 
911     p = strstr(syspath, "/target");
912     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
913                     tgt, tgt + 1, tgt + 2) == 3) {
914         has_tgt = true;
915     }
916 
917     p = strstr(syspath, "/ata");
918     if (p) {
919         q = p + 4;
920         has_ata = true;
921     } else {
922         p = strstr(syspath, "/host");
923         q = p + 5;
924     }
925     if (p && sscanf(q, "%u", &host) == 1) {
926         has_host = true;
927         nhosts = build_hosts(syspath, p, has_ata, hosts,
928                              ARRAY_SIZE(hosts), errp);
929         if (nhosts < 0) {
930             goto cleanup;
931         }
932     }
933 
934     pciaddr->domain = pci[0];
935     pciaddr->bus = pci[1];
936     pciaddr->slot = pci[2];
937     pciaddr->function = pci[3];
938 
939     if (strcmp(driver, "ata_piix") == 0) {
940         /* a host per ide bus, target*:0:<unit>:0 */
941         if (!has_host || !has_tgt) {
942             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
943             goto cleanup;
944         }
945         for (i = 0; i < nhosts; i++) {
946             if (host == hosts[i]) {
947                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
948                 disk->bus = i;
949                 disk->unit = tgt[1];
950                 break;
951             }
952         }
953         if (i >= nhosts) {
954             g_debug("no host for '%s' (driver '%s')", syspath, driver);
955             goto cleanup;
956         }
957     } else if (strcmp(driver, "sym53c8xx") == 0) {
958         /* scsi(LSI Logic): target*:0:<unit>:0 */
959         if (!has_tgt) {
960             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
961             goto cleanup;
962         }
963         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
964         disk->unit = tgt[1];
965     } else if (strcmp(driver, "virtio-pci") == 0) {
966         if (has_tgt) {
967             /* virtio-scsi: target*:0:0:<unit> */
968             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
969             disk->unit = tgt[2];
970         } else {
971             /* virtio-blk: 1 disk per 1 device */
972             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
973         }
974     } else if (strcmp(driver, "ahci") == 0) {
975         /* ahci: 1 host per 1 unit */
976         if (!has_host || !has_tgt) {
977             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
978             goto cleanup;
979         }
980         for (i = 0; i < nhosts; i++) {
981             if (host == hosts[i]) {
982                 disk->unit = i;
983                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
984                 break;
985             }
986         }
987         if (i >= nhosts) {
988             g_debug("no host for '%s' (driver '%s')", syspath, driver);
989             goto cleanup;
990         }
991     } else if (strcmp(driver, "nvme") == 0) {
992         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
993     } else {
994         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
995         goto cleanup;
996     }
997 
998     ret = true;
999 
1000 cleanup:
1001     g_free(driver);
1002     return ret;
1003 }
1004 
1005 /*
1006  * Store disk device info for non-PCI virtio devices (for example s390x
1007  * channel I/O devices). Returns true if information has been stored, or
1008  * false for failure.
1009  */
1010 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
1011                                                  GuestDiskAddress *disk,
1012                                                  Error **errp)
1013 {
1014     unsigned int tgt[3];
1015     char *p;
1016 
1017     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
1018         g_debug("Unsupported virtio device '%s'", syspath);
1019         return false;
1020     }
1021 
1022     p = strstr(syspath, "/target");
1023     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
1024                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
1025         /* virtio-scsi: target*:0:<target>:<unit> */
1026         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
1027         disk->bus = tgt[0];
1028         disk->target = tgt[1];
1029         disk->unit = tgt[2];
1030     } else {
1031         /* virtio-blk: 1 disk per 1 device */
1032         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
1033     }
1034 
1035     return true;
1036 }
1037 
1038 /*
1039  * Store disk device info for CCW devices (s390x channel I/O devices).
1040  * Returns true if information has been stored, or false for failure.
1041  */
1042 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
1043                                            GuestDiskAddress *disk,
1044                                            Error **errp)
1045 {
1046     unsigned int cssid, ssid, subchno, devno;
1047     char *p;
1048 
1049     p = strstr(syspath, "/devices/css");
1050     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
1051                      &cssid, &ssid, &subchno, &devno) < 4) {
1052         g_debug("could not parse ccw device sysfs path: %s", syspath);
1053         return false;
1054     }
1055 
1056     disk->has_ccw_address = true;
1057     disk->ccw_address = g_new0(GuestCCWAddress, 1);
1058     disk->ccw_address->cssid = cssid;
1059     disk->ccw_address->ssid = ssid;
1060     disk->ccw_address->subchno = subchno;
1061     disk->ccw_address->devno = devno;
1062 
1063     if (strstr(p, "/virtio")) {
1064         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1065     }
1066 
1067     return true;
1068 }
1069 
1070 /* Store disk device info specified by @sysfs into @fs */
1071 static void build_guest_fsinfo_for_real_device(char const *syspath,
1072                                                GuestFilesystemInfo *fs,
1073                                                Error **errp)
1074 {
1075     GuestDiskAddress *disk;
1076     GuestPCIAddress *pciaddr;
1077     bool has_hwinf;
1078 #ifdef CONFIG_LIBUDEV
1079     struct udev *udev = NULL;
1080     struct udev_device *udevice = NULL;
1081 #endif
1082 
1083     pciaddr = g_new0(GuestPCIAddress, 1);
1084     pciaddr->domain = -1;                       /* -1 means field is invalid */
1085     pciaddr->bus = -1;
1086     pciaddr->slot = -1;
1087     pciaddr->function = -1;
1088 
1089     disk = g_new0(GuestDiskAddress, 1);
1090     disk->pci_controller = pciaddr;
1091     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
1092 
1093 #ifdef CONFIG_LIBUDEV
1094     udev = udev_new();
1095     udevice = udev_device_new_from_syspath(udev, syspath);
1096     if (udev == NULL || udevice == NULL) {
1097         g_debug("failed to query udev");
1098     } else {
1099         const char *devnode, *serial;
1100         devnode = udev_device_get_devnode(udevice);
1101         if (devnode != NULL) {
1102             disk->dev = g_strdup(devnode);
1103             disk->has_dev = true;
1104         }
1105         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
1106         if (serial != NULL && *serial != 0) {
1107             disk->serial = g_strdup(serial);
1108             disk->has_serial = true;
1109         }
1110     }
1111 
1112     udev_unref(udev);
1113     udev_device_unref(udevice);
1114 #endif
1115 
1116     if (strstr(syspath, "/devices/pci")) {
1117         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
1118     } else if (strstr(syspath, "/devices/css")) {
1119         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
1120     } else if (strstr(syspath, "/virtio")) {
1121         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1122     } else {
1123         g_debug("Unsupported device type for '%s'", syspath);
1124         has_hwinf = false;
1125     }
1126 
1127     if (has_hwinf || disk->has_dev || disk->has_serial) {
1128         QAPI_LIST_PREPEND(fs->disk, disk);
1129     } else {
1130         qapi_free_GuestDiskAddress(disk);
1131     }
1132 }
1133 
1134 static void build_guest_fsinfo_for_device(char const *devpath,
1135                                           GuestFilesystemInfo *fs,
1136                                           Error **errp);
1137 
1138 /* Store a list of slave devices of virtual volume specified by @syspath into
1139  * @fs */
1140 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
1141                                                   GuestFilesystemInfo *fs,
1142                                                   Error **errp)
1143 {
1144     Error *err = NULL;
1145     DIR *dir;
1146     char *dirpath;
1147     struct dirent *entry;
1148 
1149     dirpath = g_strdup_printf("%s/slaves", syspath);
1150     dir = opendir(dirpath);
1151     if (!dir) {
1152         if (errno != ENOENT) {
1153             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
1154         }
1155         g_free(dirpath);
1156         return;
1157     }
1158 
1159     for (;;) {
1160         errno = 0;
1161         entry = readdir(dir);
1162         if (entry == NULL) {
1163             if (errno) {
1164                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
1165             }
1166             break;
1167         }
1168 
1169         if (entry->d_type == DT_LNK) {
1170             char *path;
1171 
1172             g_debug(" slave device '%s'", entry->d_name);
1173             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
1174             build_guest_fsinfo_for_device(path, fs, &err);
1175             g_free(path);
1176 
1177             if (err) {
1178                 error_propagate(errp, err);
1179                 break;
1180             }
1181         }
1182     }
1183 
1184     g_free(dirpath);
1185     closedir(dir);
1186 }
1187 
1188 static bool is_disk_virtual(const char *devpath, Error **errp)
1189 {
1190     g_autofree char *syspath = realpath(devpath, NULL);
1191 
1192     if (!syspath) {
1193         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1194         return false;
1195     }
1196     return strstr(syspath, "/devices/virtual/block/") != NULL;
1197 }
1198 
1199 /* Dispatch to functions for virtual/real device */
1200 static void build_guest_fsinfo_for_device(char const *devpath,
1201                                           GuestFilesystemInfo *fs,
1202                                           Error **errp)
1203 {
1204     ERRP_GUARD();
1205     g_autofree char *syspath = NULL;
1206     bool is_virtual = false;
1207 
1208     syspath = realpath(devpath, NULL);
1209     if (!syspath) {
1210         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1211         return;
1212     }
1213 
1214     if (!fs->name) {
1215         fs->name = g_path_get_basename(syspath);
1216     }
1217 
1218     g_debug("  parse sysfs path '%s'", syspath);
1219     is_virtual = is_disk_virtual(syspath, errp);
1220     if (*errp != NULL) {
1221         return;
1222     }
1223     if (is_virtual) {
1224         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
1225     } else {
1226         build_guest_fsinfo_for_real_device(syspath, fs, errp);
1227     }
1228 }
1229 
1230 #ifdef CONFIG_LIBUDEV
1231 
1232 /*
1233  * Wrapper around build_guest_fsinfo_for_device() for getting just
1234  * the disk address.
1235  */
1236 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
1237 {
1238     g_autoptr(GuestFilesystemInfo) fs = NULL;
1239 
1240     fs = g_new0(GuestFilesystemInfo, 1);
1241     build_guest_fsinfo_for_device(syspath, fs, errp);
1242     if (fs->disk != NULL) {
1243         return g_steal_pointer(&fs->disk->value);
1244     }
1245     return NULL;
1246 }
1247 
1248 static char *get_alias_for_syspath(const char *syspath)
1249 {
1250     struct udev *udev = NULL;
1251     struct udev_device *udevice = NULL;
1252     char *ret = NULL;
1253 
1254     udev = udev_new();
1255     if (udev == NULL) {
1256         g_debug("failed to query udev");
1257         goto out;
1258     }
1259     udevice = udev_device_new_from_syspath(udev, syspath);
1260     if (udevice == NULL) {
1261         g_debug("failed to query udev for path: %s", syspath);
1262         goto out;
1263     } else {
1264         const char *alias = udev_device_get_property_value(
1265             udevice, "DM_NAME");
1266         /*
1267          * NULL means there was an error and empty string means there is no
1268          * alias. In case of no alias we return NULL instead of empty string.
1269          */
1270         if (alias == NULL) {
1271             g_debug("failed to query udev for device alias for: %s",
1272                 syspath);
1273         } else if (*alias != 0) {
1274             ret = g_strdup(alias);
1275         }
1276     }
1277 
1278 out:
1279     udev_unref(udev);
1280     udev_device_unref(udevice);
1281     return ret;
1282 }
1283 
1284 static char *get_device_for_syspath(const char *syspath)
1285 {
1286     struct udev *udev = NULL;
1287     struct udev_device *udevice = NULL;
1288     char *ret = NULL;
1289 
1290     udev = udev_new();
1291     if (udev == NULL) {
1292         g_debug("failed to query udev");
1293         goto out;
1294     }
1295     udevice = udev_device_new_from_syspath(udev, syspath);
1296     if (udevice == NULL) {
1297         g_debug("failed to query udev for path: %s", syspath);
1298         goto out;
1299     } else {
1300         ret = g_strdup(udev_device_get_devnode(udevice));
1301     }
1302 
1303 out:
1304     udev_unref(udev);
1305     udev_device_unref(udevice);
1306     return ret;
1307 }
1308 
1309 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
1310 {
1311     g_autofree char *deps_dir = NULL;
1312     const gchar *dep;
1313     GDir *dp_deps = NULL;
1314 
1315     /* List dependent disks */
1316     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
1317     g_debug("  listing entries in: %s", deps_dir);
1318     dp_deps = g_dir_open(deps_dir, 0, NULL);
1319     if (dp_deps == NULL) {
1320         g_debug("failed to list entries in %s", deps_dir);
1321         return;
1322     }
1323     disk->has_dependencies = true;
1324     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
1325         g_autofree char *dep_dir = NULL;
1326         char *dev_name;
1327 
1328         /* Add dependent disks */
1329         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
1330         dev_name = get_device_for_syspath(dep_dir);
1331         if (dev_name != NULL) {
1332             g_debug("  adding dependent device: %s", dev_name);
1333             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
1334         }
1335     }
1336     g_dir_close(dp_deps);
1337 }
1338 
1339 /*
1340  * Detect partitions subdirectory, name is "<disk_name><number>" or
1341  * "<disk_name>p<number>"
1342  *
1343  * @disk_name -- last component of /sys path (e.g. sda)
1344  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
1345  * @disk_dev -- device node of the disk (e.g. /dev/sda)
1346  */
1347 static GuestDiskInfoList *get_disk_partitions(
1348     GuestDiskInfoList *list,
1349     const char *disk_name, const char *disk_dir,
1350     const char *disk_dev)
1351 {
1352     GuestDiskInfoList *ret = list;
1353     struct dirent *de_disk;
1354     DIR *dp_disk = NULL;
1355     size_t len = strlen(disk_name);
1356 
1357     dp_disk = opendir(disk_dir);
1358     while ((de_disk = readdir(dp_disk)) != NULL) {
1359         g_autofree char *partition_dir = NULL;
1360         char *dev_name;
1361         GuestDiskInfo *partition;
1362 
1363         if (!(de_disk->d_type & DT_DIR)) {
1364             continue;
1365         }
1366 
1367         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
1368             ((*(de_disk->d_name + len) == 'p' &&
1369             isdigit(*(de_disk->d_name + len + 1))) ||
1370                 isdigit(*(de_disk->d_name + len))))) {
1371             continue;
1372         }
1373 
1374         partition_dir = g_strdup_printf("%s/%s",
1375             disk_dir, de_disk->d_name);
1376         dev_name = get_device_for_syspath(partition_dir);
1377         if (dev_name == NULL) {
1378             g_debug("Failed to get device name for syspath: %s",
1379                 disk_dir);
1380             continue;
1381         }
1382         partition = g_new0(GuestDiskInfo, 1);
1383         partition->name = dev_name;
1384         partition->partition = true;
1385         partition->has_dependencies = true;
1386         /* Add parent disk as dependent for easier tracking of hierarchy */
1387         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
1388 
1389         QAPI_LIST_PREPEND(ret, partition);
1390     }
1391     closedir(dp_disk);
1392 
1393     return ret;
1394 }
1395 
1396 static void get_nvme_smart(GuestDiskInfo *disk)
1397 {
1398     int fd;
1399     GuestNVMeSmart *smart;
1400     NvmeSmartLog log = {0};
1401     struct nvme_admin_cmd cmd = {
1402         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
1403         .nsid = NVME_NSID_BROADCAST,
1404         .addr = (uintptr_t)&log,
1405         .data_len = sizeof(log),
1406         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
1407                  | (((sizeof(log) >> 2) - 1) << 16)
1408     };
1409 
1410     fd = qemu_open_old(disk->name, O_RDONLY);
1411     if (fd == -1) {
1412         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
1413         return;
1414     }
1415 
1416     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
1417         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
1418         close(fd);
1419         return;
1420     }
1421 
1422     disk->has_smart = true;
1423     disk->smart = g_new0(GuestDiskSmart, 1);
1424     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
1425 
1426     smart = &disk->smart->u.nvme;
1427     smart->critical_warning = log.critical_warning;
1428     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
1429     smart->available_spare = log.available_spare;
1430     smart->available_spare_threshold = log.available_spare_threshold;
1431     smart->percentage_used = log.percentage_used;
1432     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
1433     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
1434     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
1435     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
1436     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
1437     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
1438     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
1439     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
1440     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
1441     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
1442     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
1443     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
1444     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
1445     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
1446     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
1447     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
1448     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
1449     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
1450     smart->number_of_error_log_entries_lo =
1451         le64_to_cpu(log.number_of_error_log_entries[0]);
1452     smart->number_of_error_log_entries_hi =
1453         le64_to_cpu(log.number_of_error_log_entries[1]);
1454 
1455     close(fd);
1456 }
1457 
1458 static void get_disk_smart(GuestDiskInfo *disk)
1459 {
1460     if (disk->has_address
1461         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
1462         get_nvme_smart(disk);
1463     }
1464 }
1465 
1466 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1467 {
1468     GuestDiskInfoList *ret = NULL;
1469     GuestDiskInfo *disk;
1470     DIR *dp = NULL;
1471     struct dirent *de = NULL;
1472 
1473     g_debug("listing /sys/block directory");
1474     dp = opendir("/sys/block");
1475     if (dp == NULL) {
1476         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
1477         return NULL;
1478     }
1479     while ((de = readdir(dp)) != NULL) {
1480         g_autofree char *disk_dir = NULL, *line = NULL,
1481             *size_path = NULL;
1482         char *dev_name;
1483         Error *local_err = NULL;
1484         if (de->d_type != DT_LNK) {
1485             g_debug("  skipping entry: %s", de->d_name);
1486             continue;
1487         }
1488 
1489         /* Check size and skip zero-sized disks */
1490         g_debug("  checking disk size");
1491         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1492         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1493             g_debug("  failed to read disk size");
1494             continue;
1495         }
1496         if (g_strcmp0(line, "0\n") == 0) {
1497             g_debug("  skipping zero-sized disk");
1498             continue;
1499         }
1500 
1501         g_debug("  adding %s", de->d_name);
1502         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1503         dev_name = get_device_for_syspath(disk_dir);
1504         if (dev_name == NULL) {
1505             g_debug("Failed to get device name for syspath: %s",
1506                 disk_dir);
1507             continue;
1508         }
1509         disk = g_new0(GuestDiskInfo, 1);
1510         disk->name = dev_name;
1511         disk->partition = false;
1512         disk->alias = get_alias_for_syspath(disk_dir);
1513         disk->has_alias = (disk->alias != NULL);
1514         QAPI_LIST_PREPEND(ret, disk);
1515 
1516         /* Get address for non-virtual devices */
1517         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1518         if (local_err != NULL) {
1519             g_debug("  failed to check disk path, ignoring error: %s",
1520                 error_get_pretty(local_err));
1521             error_free(local_err);
1522             local_err = NULL;
1523             /* Don't try to get the address */
1524             is_virtual = true;
1525         }
1526         if (!is_virtual) {
1527             disk->address = get_disk_address(disk_dir, &local_err);
1528             if (local_err != NULL) {
1529                 g_debug("  failed to get device info, ignoring error: %s",
1530                     error_get_pretty(local_err));
1531                 error_free(local_err);
1532                 local_err = NULL;
1533             } else if (disk->address != NULL) {
1534                 disk->has_address = true;
1535             }
1536         }
1537 
1538         get_disk_deps(disk_dir, disk);
1539         get_disk_smart(disk);
1540         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1541     }
1542 
1543     closedir(dp);
1544 
1545     return ret;
1546 }
1547 
1548 #else
1549 
1550 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1551 {
1552     error_setg(errp, QERR_UNSUPPORTED);
1553     return NULL;
1554 }
1555 
1556 #endif
1557 
1558 /* Return a list of the disk device(s)' info which @mount lies on */
1559 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1560                                                Error **errp)
1561 {
1562     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1563     struct statvfs buf;
1564     unsigned long used, nonroot_total, fr_size;
1565     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1566                                     mount->devmajor, mount->devminor);
1567 
1568     fs->mountpoint = g_strdup(mount->dirname);
1569     fs->type = g_strdup(mount->devtype);
1570     build_guest_fsinfo_for_device(devpath, fs, errp);
1571 
1572     if (statvfs(fs->mountpoint, &buf) == 0) {
1573         fr_size = buf.f_frsize;
1574         used = buf.f_blocks - buf.f_bfree;
1575         nonroot_total = used + buf.f_bavail;
1576         fs->used_bytes = used * fr_size;
1577         fs->total_bytes = nonroot_total * fr_size;
1578 
1579         fs->has_total_bytes = true;
1580         fs->has_used_bytes = true;
1581     }
1582 
1583     g_free(devpath);
1584 
1585     return fs;
1586 }
1587 
1588 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1589 {
1590     FsMountList mounts;
1591     struct FsMount *mount;
1592     GuestFilesystemInfoList *ret = NULL;
1593     Error *local_err = NULL;
1594 
1595     QTAILQ_INIT(&mounts);
1596     build_fs_mount_list(&mounts, &local_err);
1597     if (local_err) {
1598         error_propagate(errp, local_err);
1599         return NULL;
1600     }
1601 
1602     QTAILQ_FOREACH(mount, &mounts, next) {
1603         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1604 
1605         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1606         if (local_err) {
1607             error_propagate(errp, local_err);
1608             qapi_free_GuestFilesystemInfoList(ret);
1609             ret = NULL;
1610             break;
1611         }
1612     }
1613 
1614     free_fs_mount_list(&mounts);
1615     return ret;
1616 }
1617 
1618 
1619 typedef enum {
1620     FSFREEZE_HOOK_THAW = 0,
1621     FSFREEZE_HOOK_FREEZE,
1622 } FsfreezeHookArg;
1623 
1624 static const char *fsfreeze_hook_arg_string[] = {
1625     "thaw",
1626     "freeze",
1627 };
1628 
1629 static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
1630 {
1631     int status;
1632     pid_t pid;
1633     const char *hook;
1634     const char *arg_str = fsfreeze_hook_arg_string[arg];
1635     Error *local_err = NULL;
1636 
1637     hook = ga_fsfreeze_hook(ga_state);
1638     if (!hook) {
1639         return;
1640     }
1641     if (access(hook, X_OK) != 0) {
1642         error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
1643         return;
1644     }
1645 
1646     slog("executing fsfreeze hook with arg '%s'", arg_str);
1647     pid = fork();
1648     if (pid == 0) {
1649         setsid();
1650         reopen_fd_to_null(0);
1651         reopen_fd_to_null(1);
1652         reopen_fd_to_null(2);
1653 
1654         execl(hook, hook, arg_str, NULL);
1655         _exit(EXIT_FAILURE);
1656     } else if (pid < 0) {
1657         error_setg_errno(errp, errno, "failed to create child process");
1658         return;
1659     }
1660 
1661     ga_wait_child(pid, &status, &local_err);
1662     if (local_err) {
1663         error_propagate(errp, local_err);
1664         return;
1665     }
1666 
1667     if (!WIFEXITED(status)) {
1668         error_setg(errp, "fsfreeze hook has terminated abnormally");
1669         return;
1670     }
1671 
1672     status = WEXITSTATUS(status);
1673     if (status) {
1674         error_setg(errp, "fsfreeze hook has failed with status %d", status);
1675         return;
1676     }
1677 }
1678 
1679 /*
1680  * Return status of freeze/thaw
1681  */
1682 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
1683 {
1684     if (ga_is_frozen(ga_state)) {
1685         return GUEST_FSFREEZE_STATUS_FROZEN;
1686     }
1687 
1688     return GUEST_FSFREEZE_STATUS_THAWED;
1689 }
1690 
1691 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
1692 {
1693     return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
1694 }
1695 
1696 /*
1697  * Walk list of mounted file systems in the guest, and freeze the ones which
1698  * are real local file systems.
1699  */
1700 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
1701                                        strList *mountpoints,
1702                                        Error **errp)
1703 {
1704     int ret = 0, i = 0;
1705     strList *list;
1706     FsMountList mounts;
1707     struct FsMount *mount;
1708     Error *local_err = NULL;
1709     int fd;
1710 
1711     slog("guest-fsfreeze called");
1712 
1713     execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
1714     if (local_err) {
1715         error_propagate(errp, local_err);
1716         return -1;
1717     }
1718 
1719     QTAILQ_INIT(&mounts);
1720     build_fs_mount_list(&mounts, &local_err);
1721     if (local_err) {
1722         error_propagate(errp, local_err);
1723         return -1;
1724     }
1725 
1726     /* cannot risk guest agent blocking itself on a write in this state */
1727     ga_set_frozen(ga_state);
1728 
1729     QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
1730         /* To issue fsfreeze in the reverse order of mounts, check if the
1731          * mount is listed in the list here */
1732         if (has_mountpoints) {
1733             for (list = mountpoints; list; list = list->next) {
1734                 if (strcmp(list->value, mount->dirname) == 0) {
1735                     break;
1736                 }
1737             }
1738             if (!list) {
1739                 continue;
1740             }
1741         }
1742 
1743         fd = qemu_open_old(mount->dirname, O_RDONLY);
1744         if (fd == -1) {
1745             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
1746             goto error;
1747         }
1748 
1749         /* we try to cull filesystems we know won't work in advance, but other
1750          * filesystems may not implement fsfreeze for less obvious reasons.
1751          * these will report EOPNOTSUPP. we simply ignore these when tallying
1752          * the number of frozen filesystems.
1753          * if a filesystem is mounted more than once (aka bind mount) a
1754          * consecutive attempt to freeze an already frozen filesystem will
1755          * return EBUSY.
1756          *
1757          * any other error means a failure to freeze a filesystem we
1758          * expect to be freezable, so return an error in those cases
1759          * and return system to thawed state.
1760          */
1761         ret = ioctl(fd, FIFREEZE);
1762         if (ret == -1) {
1763             if (errno != EOPNOTSUPP && errno != EBUSY) {
1764                 error_setg_errno(errp, errno, "failed to freeze %s",
1765                                  mount->dirname);
1766                 close(fd);
1767                 goto error;
1768             }
1769         } else {
1770             i++;
1771         }
1772         close(fd);
1773     }
1774 
1775     free_fs_mount_list(&mounts);
1776     /* We may not issue any FIFREEZE here.
1777      * Just unset ga_state here and ready for the next call.
1778      */
1779     if (i == 0) {
1780         ga_unset_frozen(ga_state);
1781     }
1782     return i;
1783 
1784 error:
1785     free_fs_mount_list(&mounts);
1786     qmp_guest_fsfreeze_thaw(NULL);
1787     return 0;
1788 }
1789 
1790 /*
1791  * Walk list of frozen file systems in the guest, and thaw them.
1792  */
1793 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
1794 {
1795     int ret;
1796     FsMountList mounts;
1797     FsMount *mount;
1798     int fd, i = 0, logged;
1799     Error *local_err = NULL;
1800 
1801     QTAILQ_INIT(&mounts);
1802     build_fs_mount_list(&mounts, &local_err);
1803     if (local_err) {
1804         error_propagate(errp, local_err);
1805         return 0;
1806     }
1807 
1808     QTAILQ_FOREACH(mount, &mounts, next) {
1809         logged = false;
1810         fd = qemu_open_old(mount->dirname, O_RDONLY);
1811         if (fd == -1) {
1812             continue;
1813         }
1814         /* we have no way of knowing whether a filesystem was actually unfrozen
1815          * as a result of a successful call to FITHAW, only that if an error
1816          * was returned the filesystem was *not* unfrozen by that particular
1817          * call.
1818          *
1819          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
1820          * to unfreeze, continuing issuing FITHAW until an error is returned,
1821          * in which case either the filesystem is in an unfreezable state, or,
1822          * more likely, it was thawed previously (and remains so afterward).
1823          *
1824          * also, since the most recent successful call is the one that did
1825          * the actual unfreeze, we can use this to provide an accurate count
1826          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
1827          * may * be useful for determining whether a filesystem was unfrozen
1828          * during the freeze/thaw phase by a process other than qemu-ga.
1829          */
1830         do {
1831             ret = ioctl(fd, FITHAW);
1832             if (ret == 0 && !logged) {
1833                 i++;
1834                 logged = true;
1835             }
1836         } while (ret == 0);
1837         close(fd);
1838     }
1839 
1840     ga_unset_frozen(ga_state);
1841     free_fs_mount_list(&mounts);
1842 
1843     execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
1844 
1845     return i;
1846 }
1847 
1848 static void guest_fsfreeze_cleanup(void)
1849 {
1850     Error *err = NULL;
1851 
1852     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
1853         qmp_guest_fsfreeze_thaw(&err);
1854         if (err) {
1855             slog("failed to clean up frozen filesystems: %s",
1856                  error_get_pretty(err));
1857             error_free(err);
1858         }
1859     }
1860 }
1861 #endif /* CONFIG_FSFREEZE */
1862 
1863 #if defined(CONFIG_FSTRIM)
1864 /*
1865  * Walk list of mounted file systems in the guest, and trim them.
1866  */
1867 GuestFilesystemTrimResponse *
1868 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1869 {
1870     GuestFilesystemTrimResponse *response;
1871     GuestFilesystemTrimResult *result;
1872     int ret = 0;
1873     FsMountList mounts;
1874     struct FsMount *mount;
1875     int fd;
1876     Error *local_err = NULL;
1877     struct fstrim_range r;
1878 
1879     slog("guest-fstrim called");
1880 
1881     QTAILQ_INIT(&mounts);
1882     build_fs_mount_list(&mounts, &local_err);
1883     if (local_err) {
1884         error_propagate(errp, local_err);
1885         return NULL;
1886     }
1887 
1888     response = g_malloc0(sizeof(*response));
1889 
1890     QTAILQ_FOREACH(mount, &mounts, next) {
1891         result = g_malloc0(sizeof(*result));
1892         result->path = g_strdup(mount->dirname);
1893 
1894         QAPI_LIST_PREPEND(response->paths, result);
1895 
1896         fd = qemu_open_old(mount->dirname, O_RDONLY);
1897         if (fd == -1) {
1898             result->error = g_strdup_printf("failed to open: %s",
1899                                             strerror(errno));
1900             result->has_error = true;
1901             continue;
1902         }
1903 
1904         /* We try to cull filesystems we know won't work in advance, but other
1905          * filesystems may not implement fstrim for less obvious reasons.
1906          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1907          * will be reported (e.g. CD-ROMs).
1908          * Any other error means an unexpected error.
1909          */
1910         r.start = 0;
1911         r.len = -1;
1912         r.minlen = has_minimum ? minimum : 0;
1913         ret = ioctl(fd, FITRIM, &r);
1914         if (ret == -1) {
1915             result->has_error = true;
1916             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1917                 result->error = g_strdup("trim not supported");
1918             } else {
1919                 result->error = g_strdup_printf("failed to trim: %s",
1920                                                 strerror(errno));
1921             }
1922             close(fd);
1923             continue;
1924         }
1925 
1926         result->has_minimum = true;
1927         result->minimum = r.minlen;
1928         result->has_trimmed = true;
1929         result->trimmed = r.len;
1930         close(fd);
1931     }
1932 
1933     free_fs_mount_list(&mounts);
1934     return response;
1935 }
1936 #endif /* CONFIG_FSTRIM */
1937 
1938 
1939 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1940 #define SUSPEND_SUPPORTED 0
1941 #define SUSPEND_NOT_SUPPORTED 1
1942 
1943 typedef enum {
1944     SUSPEND_MODE_DISK = 0,
1945     SUSPEND_MODE_RAM = 1,
1946     SUSPEND_MODE_HYBRID = 2,
1947 } SuspendMode;
1948 
1949 /*
1950  * Executes a command in a child process using g_spawn_sync,
1951  * returning an int >= 0 representing the exit status of the
1952  * process.
1953  *
1954  * If the program wasn't found in path, returns -1.
1955  *
1956  * If a problem happened when creating the child process,
1957  * returns -1 and errp is set.
1958  */
1959 static int run_process_child(const char *command[], Error **errp)
1960 {
1961     int exit_status, spawn_flag;
1962     GError *g_err = NULL;
1963     bool success;
1964 
1965     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1966                  G_SPAWN_STDERR_TO_DEV_NULL;
1967 
1968     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1969                             NULL, NULL, NULL, NULL,
1970                             &exit_status, &g_err);
1971 
1972     if (success) {
1973         return WEXITSTATUS(exit_status);
1974     }
1975 
1976     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1977         error_setg(errp, "failed to create child process, error '%s'",
1978                    g_err->message);
1979     }
1980 
1981     g_error_free(g_err);
1982     return -1;
1983 }
1984 
1985 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1986 {
1987     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1988                                      "systemd-hybrid-sleep"};
1989     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1990     int status;
1991 
1992     status = run_process_child(cmd, errp);
1993 
1994     /*
1995      * systemctl status uses LSB return codes so we can expect
1996      * status > 0 and be ok. To assert if the guest has support
1997      * for the selected suspend mode, status should be < 4. 4 is
1998      * the code for unknown service status, the return value when
1999      * the service does not exist. A common value is status = 3
2000      * (program is not running).
2001      */
2002     if (status > 0 && status < 4) {
2003         return true;
2004     }
2005 
2006     return false;
2007 }
2008 
2009 static void systemd_suspend(SuspendMode mode, Error **errp)
2010 {
2011     Error *local_err = NULL;
2012     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
2013     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
2014     int status;
2015 
2016     status = run_process_child(cmd, &local_err);
2017 
2018     if (status == 0) {
2019         return;
2020     }
2021 
2022     if ((status == -1) && !local_err) {
2023         error_setg(errp, "the helper program 'systemctl %s' was not found",
2024                    systemctl_args[mode]);
2025         return;
2026     }
2027 
2028     if (local_err) {
2029         error_propagate(errp, local_err);
2030     } else {
2031         error_setg(errp, "the helper program 'systemctl %s' returned an "
2032                    "unexpected exit status code (%d)",
2033                    systemctl_args[mode], status);
2034     }
2035 }
2036 
2037 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
2038 {
2039     Error *local_err = NULL;
2040     const char *pmutils_args[3] = {"--hibernate", "--suspend",
2041                                    "--suspend-hybrid"};
2042     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
2043     int status;
2044 
2045     status = run_process_child(cmd, &local_err);
2046 
2047     if (status == SUSPEND_SUPPORTED) {
2048         return true;
2049     }
2050 
2051     if ((status == -1) && !local_err) {
2052         return false;
2053     }
2054 
2055     if (local_err) {
2056         error_propagate(errp, local_err);
2057     } else {
2058         error_setg(errp,
2059                    "the helper program '%s' returned an unexpected exit"
2060                    " status code (%d)", "pm-is-supported", status);
2061     }
2062 
2063     return false;
2064 }
2065 
2066 static void pmutils_suspend(SuspendMode mode, Error **errp)
2067 {
2068     Error *local_err = NULL;
2069     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
2070                                        "pm-suspend-hybrid"};
2071     const char *cmd[2] = {pmutils_binaries[mode], NULL};
2072     int status;
2073 
2074     status = run_process_child(cmd, &local_err);
2075 
2076     if (status == 0) {
2077         return;
2078     }
2079 
2080     if ((status == -1) && !local_err) {
2081         error_setg(errp, "the helper program '%s' was not found",
2082                    pmutils_binaries[mode]);
2083         return;
2084     }
2085 
2086     if (local_err) {
2087         error_propagate(errp, local_err);
2088     } else {
2089         error_setg(errp,
2090                    "the helper program '%s' returned an unexpected exit"
2091                    " status code (%d)", pmutils_binaries[mode], status);
2092     }
2093 }
2094 
2095 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
2096 {
2097     const char *sysfile_strs[3] = {"disk", "mem", NULL};
2098     const char *sysfile_str = sysfile_strs[mode];
2099     char buf[32]; /* hopefully big enough */
2100     int fd;
2101     ssize_t ret;
2102 
2103     if (!sysfile_str) {
2104         error_setg(errp, "unknown guest suspend mode");
2105         return false;
2106     }
2107 
2108     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
2109     if (fd < 0) {
2110         return false;
2111     }
2112 
2113     ret = read(fd, buf, sizeof(buf) - 1);
2114     close(fd);
2115     if (ret <= 0) {
2116         return false;
2117     }
2118     buf[ret] = '\0';
2119 
2120     if (strstr(buf, sysfile_str)) {
2121         return true;
2122     }
2123     return false;
2124 }
2125 
2126 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
2127 {
2128     Error *local_err = NULL;
2129     const char *sysfile_strs[3] = {"disk", "mem", NULL};
2130     const char *sysfile_str = sysfile_strs[mode];
2131     pid_t pid;
2132     int status;
2133 
2134     if (!sysfile_str) {
2135         error_setg(errp, "unknown guest suspend mode");
2136         return;
2137     }
2138 
2139     pid = fork();
2140     if (!pid) {
2141         /* child */
2142         int fd;
2143 
2144         setsid();
2145         reopen_fd_to_null(0);
2146         reopen_fd_to_null(1);
2147         reopen_fd_to_null(2);
2148 
2149         fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
2150         if (fd < 0) {
2151             _exit(EXIT_FAILURE);
2152         }
2153 
2154         if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
2155             _exit(EXIT_FAILURE);
2156         }
2157 
2158         _exit(EXIT_SUCCESS);
2159     } else if (pid < 0) {
2160         error_setg_errno(errp, errno, "failed to create child process");
2161         return;
2162     }
2163 
2164     ga_wait_child(pid, &status, &local_err);
2165     if (local_err) {
2166         error_propagate(errp, local_err);
2167         return;
2168     }
2169 
2170     if (WEXITSTATUS(status)) {
2171         error_setg(errp, "child process has failed to suspend");
2172     }
2173 
2174 }
2175 
2176 static void guest_suspend(SuspendMode mode, Error **errp)
2177 {
2178     Error *local_err = NULL;
2179     bool mode_supported = false;
2180 
2181     if (systemd_supports_mode(mode, &local_err)) {
2182         mode_supported = true;
2183         systemd_suspend(mode, &local_err);
2184     }
2185 
2186     if (!local_err) {
2187         return;
2188     }
2189 
2190     error_free(local_err);
2191     local_err = NULL;
2192 
2193     if (pmutils_supports_mode(mode, &local_err)) {
2194         mode_supported = true;
2195         pmutils_suspend(mode, &local_err);
2196     }
2197 
2198     if (!local_err) {
2199         return;
2200     }
2201 
2202     error_free(local_err);
2203     local_err = NULL;
2204 
2205     if (linux_sys_state_supports_mode(mode, &local_err)) {
2206         mode_supported = true;
2207         linux_sys_state_suspend(mode, &local_err);
2208     }
2209 
2210     if (!mode_supported) {
2211         error_free(local_err);
2212         error_setg(errp,
2213                    "the requested suspend mode is not supported by the guest");
2214     } else {
2215         error_propagate(errp, local_err);
2216     }
2217 }
2218 
2219 void qmp_guest_suspend_disk(Error **errp)
2220 {
2221     guest_suspend(SUSPEND_MODE_DISK, errp);
2222 }
2223 
2224 void qmp_guest_suspend_ram(Error **errp)
2225 {
2226     guest_suspend(SUSPEND_MODE_RAM, errp);
2227 }
2228 
2229 void qmp_guest_suspend_hybrid(Error **errp)
2230 {
2231     guest_suspend(SUSPEND_MODE_HYBRID, errp);
2232 }
2233 
2234 /* Transfer online/offline status between @vcpu and the guest system.
2235  *
2236  * On input either @errp or *@errp must be NULL.
2237  *
2238  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
2239  * - R: vcpu->logical_id
2240  * - W: vcpu->online
2241  * - W: vcpu->can_offline
2242  *
2243  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
2244  * - R: vcpu->logical_id
2245  * - R: vcpu->online
2246  *
2247  * Written members remain unmodified on error.
2248  */
2249 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
2250                           char *dirpath, Error **errp)
2251 {
2252     int fd;
2253     int res;
2254     int dirfd;
2255     static const char fn[] = "online";
2256 
2257     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2258     if (dirfd == -1) {
2259         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2260         return;
2261     }
2262 
2263     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
2264     if (fd == -1) {
2265         if (errno != ENOENT) {
2266             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
2267         } else if (sys2vcpu) {
2268             vcpu->online = true;
2269             vcpu->can_offline = false;
2270         } else if (!vcpu->online) {
2271             error_setg(errp, "logical processor #%" PRId64 " can't be "
2272                        "offlined", vcpu->logical_id);
2273         } /* otherwise pretend successful re-onlining */
2274     } else {
2275         unsigned char status;
2276 
2277         res = pread(fd, &status, 1, 0);
2278         if (res == -1) {
2279             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
2280         } else if (res == 0) {
2281             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
2282                        fn);
2283         } else if (sys2vcpu) {
2284             vcpu->online = (status != '0');
2285             vcpu->can_offline = true;
2286         } else if (vcpu->online != (status != '0')) {
2287             status = '0' + vcpu->online;
2288             if (pwrite(fd, &status, 1, 0) == -1) {
2289                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
2290                                  fn);
2291             }
2292         } /* otherwise pretend successful re-(on|off)-lining */
2293 
2294         res = close(fd);
2295         g_assert(res == 0);
2296     }
2297 
2298     res = close(dirfd);
2299     g_assert(res == 0);
2300 }
2301 
2302 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2303 {
2304     GuestLogicalProcessorList *head, **tail;
2305     const char *cpu_dir = "/sys/devices/system/cpu";
2306     const gchar *line;
2307     g_autoptr(GDir) cpu_gdir = NULL;
2308     Error *local_err = NULL;
2309 
2310     head = NULL;
2311     tail = &head;
2312     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
2313 
2314     if (cpu_gdir == NULL) {
2315         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
2316         return NULL;
2317     }
2318 
2319     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
2320         GuestLogicalProcessor *vcpu;
2321         int64_t id;
2322         if (sscanf(line, "cpu%" PRId64, &id)) {
2323             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
2324                                                     "cpu%" PRId64 "/", id);
2325             vcpu = g_malloc0(sizeof *vcpu);
2326             vcpu->logical_id = id;
2327             vcpu->has_can_offline = true; /* lolspeak ftw */
2328             transfer_vcpu(vcpu, true, path, &local_err);
2329             QAPI_LIST_APPEND(tail, vcpu);
2330         }
2331     }
2332 
2333     if (local_err == NULL) {
2334         /* there's no guest with zero VCPUs */
2335         g_assert(head != NULL);
2336         return head;
2337     }
2338 
2339     qapi_free_GuestLogicalProcessorList(head);
2340     error_propagate(errp, local_err);
2341     return NULL;
2342 }
2343 
2344 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2345 {
2346     int64_t processed;
2347     Error *local_err = NULL;
2348 
2349     processed = 0;
2350     while (vcpus != NULL) {
2351         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
2352                                      vcpus->value->logical_id);
2353 
2354         transfer_vcpu(vcpus->value, false, path, &local_err);
2355         g_free(path);
2356         if (local_err != NULL) {
2357             break;
2358         }
2359         ++processed;
2360         vcpus = vcpus->next;
2361     }
2362 
2363     if (local_err != NULL) {
2364         if (processed == 0) {
2365             error_propagate(errp, local_err);
2366         } else {
2367             error_free(local_err);
2368         }
2369     }
2370 
2371     return processed;
2372 }
2373 
2374 void qmp_guest_set_user_password(const char *username,
2375                                  const char *password,
2376                                  bool crypted,
2377                                  Error **errp)
2378 {
2379     Error *local_err = NULL;
2380     char *passwd_path = NULL;
2381     pid_t pid;
2382     int status;
2383     int datafd[2] = { -1, -1 };
2384     char *rawpasswddata = NULL;
2385     size_t rawpasswdlen;
2386     char *chpasswddata = NULL;
2387     size_t chpasswdlen;
2388 
2389     rawpasswddata = (char *)qbase64_decode(password, -1, &rawpasswdlen, errp);
2390     if (!rawpasswddata) {
2391         return;
2392     }
2393     rawpasswddata = g_renew(char, rawpasswddata, rawpasswdlen + 1);
2394     rawpasswddata[rawpasswdlen] = '\0';
2395 
2396     if (strchr(rawpasswddata, '\n')) {
2397         error_setg(errp, "forbidden characters in raw password");
2398         goto out;
2399     }
2400 
2401     if (strchr(username, '\n') ||
2402         strchr(username, ':')) {
2403         error_setg(errp, "forbidden characters in username");
2404         goto out;
2405     }
2406 
2407     chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata);
2408     chpasswdlen = strlen(chpasswddata);
2409 
2410     passwd_path = g_find_program_in_path("chpasswd");
2411 
2412     if (!passwd_path) {
2413         error_setg(errp, "cannot find 'passwd' program in PATH");
2414         goto out;
2415     }
2416 
2417     if (!g_unix_open_pipe(datafd, FD_CLOEXEC, NULL)) {
2418         error_setg(errp, "cannot create pipe FDs");
2419         goto out;
2420     }
2421 
2422     pid = fork();
2423     if (pid == 0) {
2424         close(datafd[1]);
2425         /* child */
2426         setsid();
2427         dup2(datafd[0], 0);
2428         reopen_fd_to_null(1);
2429         reopen_fd_to_null(2);
2430 
2431         if (crypted) {
2432             execl(passwd_path, "chpasswd", "-e", NULL);
2433         } else {
2434             execl(passwd_path, "chpasswd", NULL);
2435         }
2436         _exit(EXIT_FAILURE);
2437     } else if (pid < 0) {
2438         error_setg_errno(errp, errno, "failed to create child process");
2439         goto out;
2440     }
2441     close(datafd[0]);
2442     datafd[0] = -1;
2443 
2444     if (qemu_write_full(datafd[1], chpasswddata, chpasswdlen) != chpasswdlen) {
2445         error_setg_errno(errp, errno, "cannot write new account password");
2446         goto out;
2447     }
2448     close(datafd[1]);
2449     datafd[1] = -1;
2450 
2451     ga_wait_child(pid, &status, &local_err);
2452     if (local_err) {
2453         error_propagate(errp, local_err);
2454         goto out;
2455     }
2456 
2457     if (!WIFEXITED(status)) {
2458         error_setg(errp, "child process has terminated abnormally");
2459         goto out;
2460     }
2461 
2462     if (WEXITSTATUS(status)) {
2463         error_setg(errp, "child process has failed to set user password");
2464         goto out;
2465     }
2466 
2467 out:
2468     g_free(chpasswddata);
2469     g_free(rawpasswddata);
2470     g_free(passwd_path);
2471     if (datafd[0] != -1) {
2472         close(datafd[0]);
2473     }
2474     if (datafd[1] != -1) {
2475         close(datafd[1]);
2476     }
2477 }
2478 
2479 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
2480                                int size, Error **errp)
2481 {
2482     int fd;
2483     int res;
2484 
2485     errno = 0;
2486     fd = openat(dirfd, pathname, O_RDONLY);
2487     if (fd == -1) {
2488         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2489         return;
2490     }
2491 
2492     res = pread(fd, buf, size, 0);
2493     if (res == -1) {
2494         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
2495     } else if (res == 0) {
2496         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
2497     }
2498     close(fd);
2499 }
2500 
2501 static void ga_write_sysfs_file(int dirfd, const char *pathname,
2502                                 const char *buf, int size, Error **errp)
2503 {
2504     int fd;
2505 
2506     errno = 0;
2507     fd = openat(dirfd, pathname, O_WRONLY);
2508     if (fd == -1) {
2509         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2510         return;
2511     }
2512 
2513     if (pwrite(fd, buf, size, 0) == -1) {
2514         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
2515     }
2516 
2517     close(fd);
2518 }
2519 
2520 /* Transfer online/offline status between @mem_blk and the guest system.
2521  *
2522  * On input either @errp or *@errp must be NULL.
2523  *
2524  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
2525  * - R: mem_blk->phys_index
2526  * - W: mem_blk->online
2527  * - W: mem_blk->can_offline
2528  *
2529  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
2530  * - R: mem_blk->phys_index
2531  * - R: mem_blk->online
2532  *-  R: mem_blk->can_offline
2533  * Written members remain unmodified on error.
2534  */
2535 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
2536                                   GuestMemoryBlockResponse *result,
2537                                   Error **errp)
2538 {
2539     char *dirpath;
2540     int dirfd;
2541     char *status;
2542     Error *local_err = NULL;
2543 
2544     if (!sys2memblk) {
2545         DIR *dp;
2546 
2547         if (!result) {
2548             error_setg(errp, "Internal error, 'result' should not be NULL");
2549             return;
2550         }
2551         errno = 0;
2552         dp = opendir("/sys/devices/system/memory/");
2553          /* if there is no 'memory' directory in sysfs,
2554          * we think this VM does not support online/offline memory block,
2555          * any other solution?
2556          */
2557         if (!dp) {
2558             if (errno == ENOENT) {
2559                 result->response =
2560                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2561             }
2562             goto out1;
2563         }
2564         closedir(dp);
2565     }
2566 
2567     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
2568                               mem_blk->phys_index);
2569     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2570     if (dirfd == -1) {
2571         if (sys2memblk) {
2572             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2573         } else {
2574             if (errno == ENOENT) {
2575                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
2576             } else {
2577                 result->response =
2578                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2579             }
2580         }
2581         g_free(dirpath);
2582         goto out1;
2583     }
2584     g_free(dirpath);
2585 
2586     status = g_malloc0(10);
2587     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
2588     if (local_err) {
2589         /* treat with sysfs file that not exist in old kernel */
2590         if (errno == ENOENT) {
2591             error_free(local_err);
2592             if (sys2memblk) {
2593                 mem_blk->online = true;
2594                 mem_blk->can_offline = false;
2595             } else if (!mem_blk->online) {
2596                 result->response =
2597                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2598             }
2599         } else {
2600             if (sys2memblk) {
2601                 error_propagate(errp, local_err);
2602             } else {
2603                 error_free(local_err);
2604                 result->response =
2605                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2606             }
2607         }
2608         goto out2;
2609     }
2610 
2611     if (sys2memblk) {
2612         char removable = '0';
2613 
2614         mem_blk->online = (strncmp(status, "online", 6) == 0);
2615 
2616         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
2617         if (local_err) {
2618             /* if no 'removable' file, it doesn't support offline mem blk */
2619             if (errno == ENOENT) {
2620                 error_free(local_err);
2621                 mem_blk->can_offline = false;
2622             } else {
2623                 error_propagate(errp, local_err);
2624             }
2625         } else {
2626             mem_blk->can_offline = (removable != '0');
2627         }
2628     } else {
2629         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
2630             const char *new_state = mem_blk->online ? "online" : "offline";
2631 
2632             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
2633                                 &local_err);
2634             if (local_err) {
2635                 error_free(local_err);
2636                 result->response =
2637                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2638                 goto out2;
2639             }
2640 
2641             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
2642             result->has_error_code = false;
2643         } /* otherwise pretend successful re-(on|off)-lining */
2644     }
2645     g_free(status);
2646     close(dirfd);
2647     return;
2648 
2649 out2:
2650     g_free(status);
2651     close(dirfd);
2652 out1:
2653     if (!sys2memblk) {
2654         result->has_error_code = true;
2655         result->error_code = errno;
2656     }
2657 }
2658 
2659 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2660 {
2661     GuestMemoryBlockList *head, **tail;
2662     Error *local_err = NULL;
2663     struct dirent *de;
2664     DIR *dp;
2665 
2666     head = NULL;
2667     tail = &head;
2668 
2669     dp = opendir("/sys/devices/system/memory/");
2670     if (!dp) {
2671         /* it's ok if this happens to be a system that doesn't expose
2672          * memory blocks via sysfs, but otherwise we should report
2673          * an error
2674          */
2675         if (errno != ENOENT) {
2676             error_setg_errno(errp, errno, "Can't open directory"
2677                              "\"/sys/devices/system/memory/\"");
2678         }
2679         return NULL;
2680     }
2681 
2682     /* Note: the phys_index of memory block may be discontinuous,
2683      * this is because a memblk is the unit of the Sparse Memory design, which
2684      * allows discontinuous memory ranges (ex. NUMA), so here we should
2685      * traverse the memory block directory.
2686      */
2687     while ((de = readdir(dp)) != NULL) {
2688         GuestMemoryBlock *mem_blk;
2689 
2690         if ((strncmp(de->d_name, "memory", 6) != 0) ||
2691             !(de->d_type & DT_DIR)) {
2692             continue;
2693         }
2694 
2695         mem_blk = g_malloc0(sizeof *mem_blk);
2696         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
2697         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
2698         mem_blk->has_can_offline = true; /* lolspeak ftw */
2699         transfer_memory_block(mem_blk, true, NULL, &local_err);
2700         if (local_err) {
2701             break;
2702         }
2703 
2704         QAPI_LIST_APPEND(tail, mem_blk);
2705     }
2706 
2707     closedir(dp);
2708     if (local_err == NULL) {
2709         /* there's no guest with zero memory blocks */
2710         if (head == NULL) {
2711             error_setg(errp, "guest reported zero memory blocks!");
2712         }
2713         return head;
2714     }
2715 
2716     qapi_free_GuestMemoryBlockList(head);
2717     error_propagate(errp, local_err);
2718     return NULL;
2719 }
2720 
2721 GuestMemoryBlockResponseList *
2722 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2723 {
2724     GuestMemoryBlockResponseList *head, **tail;
2725     Error *local_err = NULL;
2726 
2727     head = NULL;
2728     tail = &head;
2729 
2730     while (mem_blks != NULL) {
2731         GuestMemoryBlockResponse *result;
2732         GuestMemoryBlock *current_mem_blk = mem_blks->value;
2733 
2734         result = g_malloc0(sizeof(*result));
2735         result->phys_index = current_mem_blk->phys_index;
2736         transfer_memory_block(current_mem_blk, false, result, &local_err);
2737         if (local_err) { /* should never happen */
2738             goto err;
2739         }
2740 
2741         QAPI_LIST_APPEND(tail, result);
2742         mem_blks = mem_blks->next;
2743     }
2744 
2745     return head;
2746 err:
2747     qapi_free_GuestMemoryBlockResponseList(head);
2748     error_propagate(errp, local_err);
2749     return NULL;
2750 }
2751 
2752 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2753 {
2754     Error *local_err = NULL;
2755     char *dirpath;
2756     int dirfd;
2757     char *buf;
2758     GuestMemoryBlockInfo *info;
2759 
2760     dirpath = g_strdup_printf("/sys/devices/system/memory/");
2761     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2762     if (dirfd == -1) {
2763         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2764         g_free(dirpath);
2765         return NULL;
2766     }
2767     g_free(dirpath);
2768 
2769     buf = g_malloc0(20);
2770     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
2771     close(dirfd);
2772     if (local_err) {
2773         g_free(buf);
2774         error_propagate(errp, local_err);
2775         return NULL;
2776     }
2777 
2778     info = g_new0(GuestMemoryBlockInfo, 1);
2779     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
2780 
2781     g_free(buf);
2782 
2783     return info;
2784 }
2785 
2786 #else /* defined(__linux__) */
2787 
2788 void qmp_guest_suspend_disk(Error **errp)
2789 {
2790     error_setg(errp, QERR_UNSUPPORTED);
2791 }
2792 
2793 void qmp_guest_suspend_ram(Error **errp)
2794 {
2795     error_setg(errp, QERR_UNSUPPORTED);
2796 }
2797 
2798 void qmp_guest_suspend_hybrid(Error **errp)
2799 {
2800     error_setg(errp, QERR_UNSUPPORTED);
2801 }
2802 
2803 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2804 {
2805     error_setg(errp, QERR_UNSUPPORTED);
2806     return NULL;
2807 }
2808 
2809 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2810 {
2811     error_setg(errp, QERR_UNSUPPORTED);
2812     return -1;
2813 }
2814 
2815 void qmp_guest_set_user_password(const char *username,
2816                                  const char *password,
2817                                  bool crypted,
2818                                  Error **errp)
2819 {
2820     error_setg(errp, QERR_UNSUPPORTED);
2821 }
2822 
2823 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2824 {
2825     error_setg(errp, QERR_UNSUPPORTED);
2826     return NULL;
2827 }
2828 
2829 GuestMemoryBlockResponseList *
2830 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2831 {
2832     error_setg(errp, QERR_UNSUPPORTED);
2833     return NULL;
2834 }
2835 
2836 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2837 {
2838     error_setg(errp, QERR_UNSUPPORTED);
2839     return NULL;
2840 }
2841 
2842 #endif
2843 
2844 #ifdef HAVE_GETIFADDRS
2845 static GuestNetworkInterface *
2846 guest_find_interface(GuestNetworkInterfaceList *head,
2847                      const char *name)
2848 {
2849     for (; head; head = head->next) {
2850         if (strcmp(head->value->name, name) == 0) {
2851             return head->value;
2852         }
2853     }
2854 
2855     return NULL;
2856 }
2857 
2858 static int guest_get_network_stats(const char *name,
2859                        GuestNetworkInterfaceStat *stats)
2860 {
2861 #ifdef CONFIG_LINUX
2862     int name_len;
2863     char const *devinfo = "/proc/net/dev";
2864     FILE *fp;
2865     char *line = NULL, *colon;
2866     size_t n = 0;
2867     fp = fopen(devinfo, "r");
2868     if (!fp) {
2869         g_debug("failed to open network stats %s: %s", devinfo,
2870                 g_strerror(errno));
2871         return -1;
2872     }
2873     name_len = strlen(name);
2874     while (getline(&line, &n, fp) != -1) {
2875         long long dummy;
2876         long long rx_bytes;
2877         long long rx_packets;
2878         long long rx_errs;
2879         long long rx_dropped;
2880         long long tx_bytes;
2881         long long tx_packets;
2882         long long tx_errs;
2883         long long tx_dropped;
2884         char *trim_line;
2885         trim_line = g_strchug(line);
2886         if (trim_line[0] == '\0') {
2887             continue;
2888         }
2889         colon = strchr(trim_line, ':');
2890         if (!colon) {
2891             continue;
2892         }
2893         if (colon - name_len  == trim_line &&
2894            strncmp(trim_line, name, name_len) == 0) {
2895             if (sscanf(colon + 1,
2896                 "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld",
2897                   &rx_bytes, &rx_packets, &rx_errs, &rx_dropped,
2898                   &dummy, &dummy, &dummy, &dummy,
2899                   &tx_bytes, &tx_packets, &tx_errs, &tx_dropped,
2900                   &dummy, &dummy, &dummy, &dummy) != 16) {
2901                 continue;
2902             }
2903             stats->rx_bytes = rx_bytes;
2904             stats->rx_packets = rx_packets;
2905             stats->rx_errs = rx_errs;
2906             stats->rx_dropped = rx_dropped;
2907             stats->tx_bytes = tx_bytes;
2908             stats->tx_packets = tx_packets;
2909             stats->tx_errs = tx_errs;
2910             stats->tx_dropped = tx_dropped;
2911             fclose(fp);
2912             g_free(line);
2913             return 0;
2914         }
2915     }
2916     fclose(fp);
2917     g_free(line);
2918     g_debug("/proc/net/dev: Interface '%s' not found", name);
2919 #else /* !CONFIG_LINUX */
2920     g_debug("Network stats reporting available only for Linux");
2921 #endif /* !CONFIG_LINUX */
2922     return -1;
2923 }
2924 
2925 /*
2926  * Build information about guest interfaces
2927  */
2928 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
2929 {
2930     GuestNetworkInterfaceList *head = NULL, **tail = &head;
2931     struct ifaddrs *ifap, *ifa;
2932 
2933     if (getifaddrs(&ifap) < 0) {
2934         error_setg_errno(errp, errno, "getifaddrs failed");
2935         goto error;
2936     }
2937 
2938     for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
2939         GuestNetworkInterface *info;
2940         GuestIpAddressList **address_tail;
2941         GuestIpAddress *address_item = NULL;
2942         GuestNetworkInterfaceStat *interface_stat = NULL;
2943         char addr4[INET_ADDRSTRLEN];
2944         char addr6[INET6_ADDRSTRLEN];
2945         int sock;
2946         struct ifreq ifr;
2947         unsigned char *mac_addr;
2948         void *p;
2949 
2950         g_debug("Processing %s interface", ifa->ifa_name);
2951 
2952         info = guest_find_interface(head, ifa->ifa_name);
2953 
2954         if (!info) {
2955             info = g_malloc0(sizeof(*info));
2956             info->name = g_strdup(ifa->ifa_name);
2957 
2958             QAPI_LIST_APPEND(tail, info);
2959         }
2960 
2961         if (!info->has_hardware_address) {
2962             /* we haven't obtained HW address yet */
2963             sock = socket(PF_INET, SOCK_STREAM, 0);
2964             if (sock == -1) {
2965                 error_setg_errno(errp, errno, "failed to create socket");
2966                 goto error;
2967             }
2968 
2969             memset(&ifr, 0, sizeof(ifr));
2970             pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->name);
2971             if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
2972                 /*
2973                  * We can't get the hw addr of this interface, but that's not a
2974                  * fatal error. Don't set info->hardware_address, but keep
2975                  * going.
2976                  */
2977                 if (errno == EADDRNOTAVAIL) {
2978                     /* The interface doesn't have a hw addr (e.g. loopback). */
2979                     g_debug("failed to get MAC address of %s: %s",
2980                             ifa->ifa_name, strerror(errno));
2981                 } else{
2982                     g_warning("failed to get MAC address of %s: %s",
2983                               ifa->ifa_name, strerror(errno));
2984                 }
2985 
2986             } else {
2987 #ifdef CONFIG_SOLARIS
2988                 mac_addr = (unsigned char *) &ifr.ifr_addr.sa_data;
2989 #else
2990                 mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
2991 #endif
2992                 info->hardware_address =
2993                     g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
2994                                     (int) mac_addr[0], (int) mac_addr[1],
2995                                     (int) mac_addr[2], (int) mac_addr[3],
2996                                     (int) mac_addr[4], (int) mac_addr[5]);
2997 
2998                 info->has_hardware_address = true;
2999             }
3000             close(sock);
3001         }
3002 
3003         if (ifa->ifa_addr &&
3004             ifa->ifa_addr->sa_family == AF_INET) {
3005             /* interface with IPv4 address */
3006             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
3007             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
3008                 error_setg_errno(errp, errno, "inet_ntop failed");
3009                 goto error;
3010             }
3011 
3012             address_item = g_malloc0(sizeof(*address_item));
3013             address_item->ip_address = g_strdup(addr4);
3014             address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
3015 
3016             if (ifa->ifa_netmask) {
3017                 /* Count the number of set bits in netmask.
3018                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
3019                 p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
3020                 address_item->prefix = ctpop32(((uint32_t *) p)[0]);
3021             }
3022         } else if (ifa->ifa_addr &&
3023                    ifa->ifa_addr->sa_family == AF_INET6) {
3024             /* interface with IPv6 address */
3025             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
3026             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
3027                 error_setg_errno(errp, errno, "inet_ntop failed");
3028                 goto error;
3029             }
3030 
3031             address_item = g_malloc0(sizeof(*address_item));
3032             address_item->ip_address = g_strdup(addr6);
3033             address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
3034 
3035             if (ifa->ifa_netmask) {
3036                 /* Count the number of set bits in netmask.
3037                  * This is safe as '1' and '0' cannot be shuffled in netmask. */
3038                 p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
3039                 address_item->prefix =
3040                     ctpop32(((uint32_t *) p)[0]) +
3041                     ctpop32(((uint32_t *) p)[1]) +
3042                     ctpop32(((uint32_t *) p)[2]) +
3043                     ctpop32(((uint32_t *) p)[3]);
3044             }
3045         }
3046 
3047         if (!address_item) {
3048             continue;
3049         }
3050 
3051         address_tail = &info->ip_addresses;
3052         while (*address_tail) {
3053             address_tail = &(*address_tail)->next;
3054         }
3055         QAPI_LIST_APPEND(address_tail, address_item);
3056 
3057         info->has_ip_addresses = true;
3058 
3059         if (!info->has_statistics) {
3060             interface_stat = g_malloc0(sizeof(*interface_stat));
3061             if (guest_get_network_stats(info->name, interface_stat) == -1) {
3062                 info->has_statistics = false;
3063                 g_free(interface_stat);
3064             } else {
3065                 info->statistics = interface_stat;
3066                 info->has_statistics = true;
3067             }
3068         }
3069     }
3070 
3071     freeifaddrs(ifap);
3072     return head;
3073 
3074 error:
3075     freeifaddrs(ifap);
3076     qapi_free_GuestNetworkInterfaceList(head);
3077     return NULL;
3078 }
3079 
3080 #else
3081 
3082 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
3083 {
3084     error_setg(errp, QERR_UNSUPPORTED);
3085     return NULL;
3086 }
3087 
3088 #endif /* HAVE_GETIFADDRS */
3089 
3090 #if !defined(CONFIG_FSFREEZE)
3091 
3092 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
3093 {
3094     error_setg(errp, QERR_UNSUPPORTED);
3095     return NULL;
3096 }
3097 
3098 GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
3099 {
3100     error_setg(errp, QERR_UNSUPPORTED);
3101 
3102     return 0;
3103 }
3104 
3105 int64_t qmp_guest_fsfreeze_freeze(Error **errp)
3106 {
3107     error_setg(errp, QERR_UNSUPPORTED);
3108 
3109     return 0;
3110 }
3111 
3112 int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
3113                                        strList *mountpoints,
3114                                        Error **errp)
3115 {
3116     error_setg(errp, QERR_UNSUPPORTED);
3117 
3118     return 0;
3119 }
3120 
3121 int64_t qmp_guest_fsfreeze_thaw(Error **errp)
3122 {
3123     error_setg(errp, QERR_UNSUPPORTED);
3124 
3125     return 0;
3126 }
3127 
3128 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
3129 {
3130     error_setg(errp, QERR_UNSUPPORTED);
3131     return NULL;
3132 }
3133 
3134 #endif /* CONFIG_FSFREEZE */
3135 
3136 #if !defined(CONFIG_FSTRIM)
3137 GuestFilesystemTrimResponse *
3138 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
3139 {
3140     error_setg(errp, QERR_UNSUPPORTED);
3141     return NULL;
3142 }
3143 #endif
3144 
3145 /* add unsupported commands to the blacklist */
3146 GList *ga_command_blacklist_init(GList *blacklist)
3147 {
3148 #if !defined(__linux__)
3149     {
3150         const char *list[] = {
3151             "guest-suspend-disk", "guest-suspend-ram",
3152             "guest-suspend-hybrid", "guest-get-vcpus", "guest-set-vcpus",
3153             "guest-get-memory-blocks", "guest-set-memory-blocks",
3154             "guest-get-memory-block-size", "guest-get-memory-block-info",
3155             NULL};
3156         char **p = (char **)list;
3157 
3158         while (*p) {
3159             blacklist = g_list_append(blacklist, g_strdup(*p++));
3160         }
3161     }
3162 #endif
3163 
3164 #if !defined(HAVE_GETIFADDRS)
3165     blacklist = g_list_append(blacklist,
3166                               g_strdup("guest-network-get-interfaces"));
3167 #endif
3168 
3169 #if !defined(CONFIG_FSFREEZE)
3170     {
3171         const char *list[] = {
3172             "guest-get-fsinfo", "guest-fsfreeze-status",
3173             "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list",
3174             "guest-fsfreeze-thaw", "guest-get-fsinfo",
3175             "guest-get-disks", NULL};
3176         char **p = (char **)list;
3177 
3178         while (*p) {
3179             blacklist = g_list_append(blacklist, g_strdup(*p++));
3180         }
3181     }
3182 #endif
3183 
3184 #if !defined(CONFIG_FSTRIM)
3185     blacklist = g_list_append(blacklist, g_strdup("guest-fstrim"));
3186 #endif
3187 
3188     blacklist = g_list_append(blacklist, g_strdup("guest-get-devices"));
3189 
3190     return blacklist;
3191 }
3192 
3193 /* register init/cleanup routines for stateful command groups */
3194 void ga_command_state_init(GAState *s, GACommandState *cs)
3195 {
3196 #if defined(CONFIG_FSFREEZE)
3197     ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
3198 #endif
3199 }
3200 
3201 #ifdef HAVE_UTMPX
3202 
3203 #define QGA_MICRO_SECOND_TO_SECOND 1000000
3204 
3205 static double ga_get_login_time(struct utmpx *user_info)
3206 {
3207     double seconds = (double)user_info->ut_tv.tv_sec;
3208     double useconds = (double)user_info->ut_tv.tv_usec;
3209     useconds /= QGA_MICRO_SECOND_TO_SECOND;
3210     return seconds + useconds;
3211 }
3212 
3213 GuestUserList *qmp_guest_get_users(Error **errp)
3214 {
3215     GHashTable *cache = NULL;
3216     GuestUserList *head = NULL, **tail = &head;
3217     struct utmpx *user_info = NULL;
3218     gpointer value = NULL;
3219     GuestUser *user = NULL;
3220     double login_time = 0;
3221 
3222     cache = g_hash_table_new(g_str_hash, g_str_equal);
3223     setutxent();
3224 
3225     for (;;) {
3226         user_info = getutxent();
3227         if (user_info == NULL) {
3228             break;
3229         } else if (user_info->ut_type != USER_PROCESS) {
3230             continue;
3231         } else if (g_hash_table_contains(cache, user_info->ut_user)) {
3232             value = g_hash_table_lookup(cache, user_info->ut_user);
3233             user = (GuestUser *)value;
3234             login_time = ga_get_login_time(user_info);
3235             /* We're ensuring the earliest login time to be sent */
3236             if (login_time < user->login_time) {
3237                 user->login_time = login_time;
3238             }
3239             continue;
3240         }
3241 
3242         user = g_new0(GuestUser, 1);
3243         user->user = g_strdup(user_info->ut_user);
3244         user->login_time = ga_get_login_time(user_info);
3245 
3246         g_hash_table_insert(cache, user->user, user);
3247 
3248         QAPI_LIST_APPEND(tail, user);
3249     }
3250     endutxent();
3251     g_hash_table_destroy(cache);
3252     return head;
3253 }
3254 
3255 #else
3256 
3257 GuestUserList *qmp_guest_get_users(Error **errp)
3258 {
3259     error_setg(errp, QERR_UNSUPPORTED);
3260     return NULL;
3261 }
3262 
3263 #endif
3264 
3265 /* Replace escaped special characters with theire real values. The replacement
3266  * is done in place -- returned value is in the original string.
3267  */
3268 static void ga_osrelease_replace_special(gchar *value)
3269 {
3270     gchar *p, *p2, quote;
3271 
3272     /* Trim the string at first space or semicolon if it is not enclosed in
3273      * single or double quotes. */
3274     if ((value[0] != '"') || (value[0] == '\'')) {
3275         p = strchr(value, ' ');
3276         if (p != NULL) {
3277             *p = 0;
3278         }
3279         p = strchr(value, ';');
3280         if (p != NULL) {
3281             *p = 0;
3282         }
3283         return;
3284     }
3285 
3286     quote = value[0];
3287     p2 = value;
3288     p = value + 1;
3289     while (*p != 0) {
3290         if (*p == '\\') {
3291             p++;
3292             switch (*p) {
3293             case '$':
3294             case '\'':
3295             case '"':
3296             case '\\':
3297             case '`':
3298                 break;
3299             default:
3300                 /* Keep literal backslash followed by whatever is there */
3301                 p--;
3302                 break;
3303             }
3304         } else if (*p == quote) {
3305             *p2 = 0;
3306             break;
3307         }
3308         *(p2++) = *(p++);
3309     }
3310 }
3311 
3312 static GKeyFile *ga_parse_osrelease(const char *fname)
3313 {
3314     gchar *content = NULL;
3315     gchar *content2 = NULL;
3316     GError *err = NULL;
3317     GKeyFile *keys = g_key_file_new();
3318     const char *group = "[os-release]\n";
3319 
3320     if (!g_file_get_contents(fname, &content, NULL, &err)) {
3321         slog("failed to read '%s', error: %s", fname, err->message);
3322         goto fail;
3323     }
3324 
3325     if (!g_utf8_validate(content, -1, NULL)) {
3326         slog("file is not utf-8 encoded: %s", fname);
3327         goto fail;
3328     }
3329     content2 = g_strdup_printf("%s%s", group, content);
3330 
3331     if (!g_key_file_load_from_data(keys, content2, -1, G_KEY_FILE_NONE,
3332                                    &err)) {
3333         slog("failed to parse file '%s', error: %s", fname, err->message);
3334         goto fail;
3335     }
3336 
3337     g_free(content);
3338     g_free(content2);
3339     return keys;
3340 
3341 fail:
3342     g_error_free(err);
3343     g_free(content);
3344     g_free(content2);
3345     g_key_file_free(keys);
3346     return NULL;
3347 }
3348 
3349 GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
3350 {
3351     GuestOSInfo *info = NULL;
3352     struct utsname kinfo;
3353     GKeyFile *osrelease = NULL;
3354     const char *qga_os_release = g_getenv("QGA_OS_RELEASE");
3355 
3356     info = g_new0(GuestOSInfo, 1);
3357 
3358     if (uname(&kinfo) != 0) {
3359         error_setg_errno(errp, errno, "uname failed");
3360     } else {
3361         info->has_kernel_version = true;
3362         info->kernel_version = g_strdup(kinfo.version);
3363         info->has_kernel_release = true;
3364         info->kernel_release = g_strdup(kinfo.release);
3365         info->has_machine = true;
3366         info->machine = g_strdup(kinfo.machine);
3367     }
3368 
3369     if (qga_os_release != NULL) {
3370         osrelease = ga_parse_osrelease(qga_os_release);
3371     } else {
3372         osrelease = ga_parse_osrelease("/etc/os-release");
3373         if (osrelease == NULL) {
3374             osrelease = ga_parse_osrelease("/usr/lib/os-release");
3375         }
3376     }
3377 
3378     if (osrelease != NULL) {
3379         char *value;
3380 
3381 #define GET_FIELD(field, osfield) do { \
3382     value = g_key_file_get_value(osrelease, "os-release", osfield, NULL); \
3383     if (value != NULL) { \
3384         ga_osrelease_replace_special(value); \
3385         info->has_ ## field = true; \
3386         info->field = value; \
3387     } \
3388 } while (0)
3389         GET_FIELD(id, "ID");
3390         GET_FIELD(name, "NAME");
3391         GET_FIELD(pretty_name, "PRETTY_NAME");
3392         GET_FIELD(version, "VERSION");
3393         GET_FIELD(version_id, "VERSION_ID");
3394         GET_FIELD(variant, "VARIANT");
3395         GET_FIELD(variant_id, "VARIANT_ID");
3396 #undef GET_FIELD
3397 
3398         g_key_file_free(osrelease);
3399     }
3400 
3401     return info;
3402 }
3403 
3404 GuestDeviceInfoList *qmp_guest_get_devices(Error **errp)
3405 {
3406     error_setg(errp, QERR_UNSUPPORTED);
3407 
3408     return NULL;
3409 }
3410 
3411 #ifndef HOST_NAME_MAX
3412 # ifdef _POSIX_HOST_NAME_MAX
3413 #  define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
3414 # else
3415 #  define HOST_NAME_MAX 255
3416 # endif
3417 #endif
3418 
3419 char *qga_get_host_name(Error **errp)
3420 {
3421     long len = -1;
3422     g_autofree char *hostname = NULL;
3423 
3424 #ifdef _SC_HOST_NAME_MAX
3425     len = sysconf(_SC_HOST_NAME_MAX);
3426 #endif /* _SC_HOST_NAME_MAX */
3427 
3428     if (len < 0) {
3429         len = HOST_NAME_MAX;
3430     }
3431 
3432     /* Unfortunately, gethostname() below does not guarantee a
3433      * NULL terminated string. Therefore, allocate one byte more
3434      * to be sure. */
3435     hostname = g_new0(char, len + 1);
3436 
3437     if (gethostname(hostname, len) < 0) {
3438         error_setg_errno(errp, errno,
3439                          "cannot get hostname");
3440         return NULL;
3441     }
3442 
3443     return g_steal_pointer(&hostname);
3444 }
3445