xref: /qemu/util/osdep.c (revision 5df022cf)
1 /*
2  * QEMU low level functions
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qapi/error.h"
26 
27 /* Needed early for CONFIG_BSD etc. */
28 
29 #ifdef CONFIG_SOLARIS
30 #include <sys/statvfs.h>
31 /* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
32    discussion about Solaris header problems */
33 extern int madvise(char *, size_t, int);
34 #endif
35 
36 #include <dirent.h>
37 #include "qemu-common.h"
38 #include "qemu/cutils.h"
39 #include "qemu/sockets.h"
40 #include "qemu/error-report.h"
41 #include "qemu/madvise.h"
42 #include "qemu/mprotect.h"
43 #include "qemu/hw-version.h"
44 #include "monitor/monitor.h"
45 
46 static bool fips_enabled = false;
47 
48 static const char *hw_version = QEMU_HW_VERSION;
49 
50 int socket_set_cork(int fd, int v)
51 {
52 #if defined(SOL_TCP) && defined(TCP_CORK)
53     return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
54 #else
55     return 0;
56 #endif
57 }
58 
59 int socket_set_nodelay(int fd)
60 {
61     int v = 1;
62     return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
63 }
64 
65 int qemu_madvise(void *addr, size_t len, int advice)
66 {
67     if (advice == QEMU_MADV_INVALID) {
68         errno = EINVAL;
69         return -1;
70     }
71 #if defined(CONFIG_MADVISE)
72     return madvise(addr, len, advice);
73 #elif defined(CONFIG_POSIX_MADVISE)
74     return posix_madvise(addr, len, advice);
75 #else
76     errno = EINVAL;
77     return -1;
78 #endif
79 }
80 
81 static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
82 {
83     g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask));
84     g_assert(!(size & ~qemu_real_host_page_mask));
85 
86 #ifdef _WIN32
87     DWORD old_protect;
88 
89     if (!VirtualProtect(addr, size, prot, &old_protect)) {
90         g_autofree gchar *emsg = g_win32_error_message(GetLastError());
91         error_report("%s: VirtualProtect failed: %s", __func__, emsg);
92         return -1;
93     }
94     return 0;
95 #else
96     if (mprotect(addr, size, prot)) {
97         error_report("%s: mprotect failed: %s", __func__, strerror(errno));
98         return -1;
99     }
100     return 0;
101 #endif
102 }
103 
104 int qemu_mprotect_rw(void *addr, size_t size)
105 {
106 #ifdef _WIN32
107     return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
108 #else
109     return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
110 #endif
111 }
112 
113 int qemu_mprotect_rwx(void *addr, size_t size)
114 {
115 #ifdef _WIN32
116     return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
117 #else
118     return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
119 #endif
120 }
121 
122 int qemu_mprotect_none(void *addr, size_t size)
123 {
124 #ifdef _WIN32
125     return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
126 #else
127     return qemu_mprotect__osdep(addr, size, PROT_NONE);
128 #endif
129 }
130 
131 #ifndef _WIN32
132 
133 static int fcntl_op_setlk = -1;
134 static int fcntl_op_getlk = -1;
135 
136 /*
137  * Dups an fd and sets the flags
138  */
139 int qemu_dup_flags(int fd, int flags)
140 {
141     int ret;
142     int serrno;
143     int dup_flags;
144 
145     ret = qemu_dup(fd);
146     if (ret == -1) {
147         goto fail;
148     }
149 
150     dup_flags = fcntl(ret, F_GETFL);
151     if (dup_flags == -1) {
152         goto fail;
153     }
154 
155     if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
156         errno = EINVAL;
157         goto fail;
158     }
159 
160     /* Set/unset flags that we can with fcntl */
161     if (fcntl(ret, F_SETFL, flags) == -1) {
162         goto fail;
163     }
164 
165     /* Truncate the file in the cases that open() would truncate it */
166     if (flags & O_TRUNC ||
167             ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
168         if (ftruncate(ret, 0) == -1) {
169             goto fail;
170         }
171     }
172 
173     return ret;
174 
175 fail:
176     serrno = errno;
177     if (ret != -1) {
178         close(ret);
179     }
180     errno = serrno;
181     return -1;
182 }
183 
184 int qemu_dup(int fd)
185 {
186     int ret;
187 #ifdef F_DUPFD_CLOEXEC
188     ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
189 #else
190     ret = dup(fd);
191     if (ret != -1) {
192         qemu_set_cloexec(ret);
193     }
194 #endif
195     return ret;
196 }
197 
198 static int qemu_parse_fdset(const char *param)
199 {
200     return qemu_parse_fd(param);
201 }
202 
203 static void qemu_probe_lock_ops(void)
204 {
205     if (fcntl_op_setlk == -1) {
206 #ifdef F_OFD_SETLK
207         int fd;
208         int ret;
209         struct flock fl = {
210             .l_whence = SEEK_SET,
211             .l_start  = 0,
212             .l_len    = 0,
213             .l_type   = F_WRLCK,
214         };
215 
216         fd = open("/dev/null", O_RDWR);
217         if (fd < 0) {
218             fprintf(stderr,
219                     "Failed to open /dev/null for OFD lock probing: %s\n",
220                     strerror(errno));
221             fcntl_op_setlk = F_SETLK;
222             fcntl_op_getlk = F_GETLK;
223             return;
224         }
225         ret = fcntl(fd, F_OFD_GETLK, &fl);
226         close(fd);
227         if (!ret) {
228             fcntl_op_setlk = F_OFD_SETLK;
229             fcntl_op_getlk = F_OFD_GETLK;
230         } else {
231             fcntl_op_setlk = F_SETLK;
232             fcntl_op_getlk = F_GETLK;
233         }
234 #else
235         fcntl_op_setlk = F_SETLK;
236         fcntl_op_getlk = F_GETLK;
237 #endif
238     }
239 }
240 
241 bool qemu_has_ofd_lock(void)
242 {
243     qemu_probe_lock_ops();
244 #ifdef F_OFD_SETLK
245     return fcntl_op_setlk == F_OFD_SETLK;
246 #else
247     return false;
248 #endif
249 }
250 
251 static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
252 {
253     int ret;
254     struct flock fl = {
255         .l_whence = SEEK_SET,
256         .l_start  = start,
257         .l_len    = len,
258         .l_type   = fl_type,
259     };
260     qemu_probe_lock_ops();
261     do {
262         ret = fcntl(fd, fcntl_op_setlk, &fl);
263     } while (ret == -1 && errno == EINTR);
264     return ret == -1 ? -errno : 0;
265 }
266 
267 int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
268 {
269     return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
270 }
271 
272 int qemu_unlock_fd(int fd, int64_t start, int64_t len)
273 {
274     return qemu_lock_fcntl(fd, start, len, F_UNLCK);
275 }
276 
277 int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
278 {
279     int ret;
280     struct flock fl = {
281         .l_whence = SEEK_SET,
282         .l_start  = start,
283         .l_len    = len,
284         .l_type   = exclusive ? F_WRLCK : F_RDLCK,
285     };
286     qemu_probe_lock_ops();
287     ret = fcntl(fd, fcntl_op_getlk, &fl);
288     if (ret == -1) {
289         return -errno;
290     } else {
291         return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
292     }
293 }
294 #endif
295 
296 static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
297 {
298     int ret;
299 #ifdef O_CLOEXEC
300     ret = open(name, flags | O_CLOEXEC, mode);
301 #else
302     ret = open(name, flags, mode);
303     if (ret >= 0) {
304         qemu_set_cloexec(ret);
305     }
306 #endif
307     return ret;
308 }
309 
310 /*
311  * Opens a file with FD_CLOEXEC set
312  */
313 static int
314 qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
315 {
316     int ret;
317 
318 #ifndef _WIN32
319     const char *fdset_id_str;
320 
321     /* Attempt dup of fd from fd set */
322     if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
323         int64_t fdset_id;
324         int dupfd;
325 
326         fdset_id = qemu_parse_fdset(fdset_id_str);
327         if (fdset_id == -1) {
328             error_setg(errp, "Could not parse fdset %s", name);
329             errno = EINVAL;
330             return -1;
331         }
332 
333         dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
334         if (dupfd == -1) {
335             error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
336                              name, flags);
337             return -1;
338         }
339 
340         return dupfd;
341     }
342 #endif
343 
344     ret = qemu_open_cloexec(name, flags, mode);
345 
346     if (ret == -1) {
347         const char *action = flags & O_CREAT ? "create" : "open";
348 #ifdef O_DIRECT
349         /* Give more helpful error message for O_DIRECT */
350         if (errno == EINVAL && (flags & O_DIRECT)) {
351             ret = open(name, flags & ~O_DIRECT, mode);
352             if (ret != -1) {
353                 close(ret);
354                 error_setg(errp, "Could not %s '%s': "
355                            "filesystem does not support O_DIRECT",
356                            action, name);
357                 errno = EINVAL; /* restore first open()'s errno */
358                 return -1;
359             }
360         }
361 #endif /* O_DIRECT */
362         error_setg_errno(errp, errno, "Could not %s '%s'",
363                          action, name);
364     }
365 
366     return ret;
367 }
368 
369 
370 int qemu_open(const char *name, int flags, Error **errp)
371 {
372     assert(!(flags & O_CREAT));
373 
374     return qemu_open_internal(name, flags, 0, errp);
375 }
376 
377 
378 int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
379 {
380     assert(!(flags & O_CREAT));
381 
382     return qemu_open_internal(name, flags | O_CREAT, mode, errp);
383 }
384 
385 
386 int qemu_open_old(const char *name, int flags, ...)
387 {
388     va_list ap;
389     mode_t mode = 0;
390     int ret;
391 
392     va_start(ap, flags);
393     if (flags & O_CREAT) {
394         mode = va_arg(ap, int);
395     }
396     va_end(ap);
397 
398     ret = qemu_open_internal(name, flags, mode, NULL);
399 
400 #ifdef O_DIRECT
401     if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
402         error_report("file system may not support O_DIRECT");
403         errno = EINVAL; /* in case it was clobbered */
404     }
405 #endif /* O_DIRECT */
406 
407     return ret;
408 }
409 
410 int qemu_close(int fd)
411 {
412     int64_t fdset_id;
413 
414     /* Close fd that was dup'd from an fdset */
415     fdset_id = monitor_fdset_dup_fd_find(fd);
416     if (fdset_id != -1) {
417         int ret;
418 
419         ret = close(fd);
420         if (ret == 0) {
421             monitor_fdset_dup_fd_remove(fd);
422         }
423 
424         return ret;
425     }
426 
427     return close(fd);
428 }
429 
430 /*
431  * Delete a file from the filesystem, unless the filename is /dev/fdset/...
432  *
433  * Returns: On success, zero is returned.  On error, -1 is returned,
434  * and errno is set appropriately.
435  */
436 int qemu_unlink(const char *name)
437 {
438     if (g_str_has_prefix(name, "/dev/fdset/")) {
439         return 0;
440     }
441 
442     return unlink(name);
443 }
444 
445 /*
446  * A variant of write(2) which handles partial write.
447  *
448  * Return the number of bytes transferred.
449  * Set errno if fewer than `count' bytes are written.
450  *
451  * This function don't work with non-blocking fd's.
452  * Any of the possibilities with non-blocking fd's is bad:
453  *   - return a short write (then name is wrong)
454  *   - busy wait adding (errno == EAGAIN) to the loop
455  */
456 ssize_t qemu_write_full(int fd, const void *buf, size_t count)
457 {
458     ssize_t ret = 0;
459     ssize_t total = 0;
460 
461     while (count) {
462         ret = write(fd, buf, count);
463         if (ret < 0) {
464             if (errno == EINTR)
465                 continue;
466             break;
467         }
468 
469         count -= ret;
470         buf += ret;
471         total += ret;
472     }
473 
474     return total;
475 }
476 
477 /*
478  * Opens a socket with FD_CLOEXEC set
479  */
480 int qemu_socket(int domain, int type, int protocol)
481 {
482     int ret;
483 
484 #ifdef SOCK_CLOEXEC
485     ret = socket(domain, type | SOCK_CLOEXEC, protocol);
486     if (ret != -1 || errno != EINVAL) {
487         return ret;
488     }
489 #endif
490     ret = socket(domain, type, protocol);
491     if (ret >= 0) {
492         qemu_set_cloexec(ret);
493     }
494 
495     return ret;
496 }
497 
498 /*
499  * Accept a connection and set FD_CLOEXEC
500  */
501 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
502 {
503     int ret;
504 
505 #ifdef CONFIG_ACCEPT4
506     ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
507     if (ret != -1 || errno != ENOSYS) {
508         return ret;
509     }
510 #endif
511     ret = accept(s, addr, addrlen);
512     if (ret >= 0) {
513         qemu_set_cloexec(ret);
514     }
515 
516     return ret;
517 }
518 
519 void qemu_set_hw_version(const char *version)
520 {
521     hw_version = version;
522 }
523 
524 const char *qemu_hw_version(void)
525 {
526     return hw_version;
527 }
528 
529 void fips_set_state(bool requested)
530 {
531 #ifdef __linux__
532     if (requested) {
533         FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
534         if (fds != NULL) {
535             fips_enabled = (fgetc(fds) == '1');
536             fclose(fds);
537         }
538     }
539 #else
540     fips_enabled = false;
541 #endif /* __linux__ */
542 
543 #ifdef _FIPS_DEBUG
544     fprintf(stderr, "FIPS mode %s (requested %s)\n",
545             (fips_enabled ? "enabled" : "disabled"),
546             (requested ? "enabled" : "disabled"));
547 #endif
548 }
549 
550 bool fips_get_state(void)
551 {
552     return fips_enabled;
553 }
554 
555 #ifdef _WIN32
556 static void socket_cleanup(void)
557 {
558     WSACleanup();
559 }
560 #endif
561 
562 int socket_init(void)
563 {
564 #ifdef _WIN32
565     WSADATA Data;
566     int ret, err;
567 
568     ret = WSAStartup(MAKEWORD(2, 2), &Data);
569     if (ret != 0) {
570         err = WSAGetLastError();
571         fprintf(stderr, "WSAStartup: %d\n", err);
572         return -1;
573     }
574     atexit(socket_cleanup);
575 #endif
576     return 0;
577 }
578 
579 
580 #ifndef CONFIG_IOVEC
581 /* helper function for iov_send_recv() */
582 static ssize_t
583 readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
584 {
585     unsigned i = 0;
586     ssize_t ret = 0;
587     while (i < iov_cnt) {
588         ssize_t r = do_write
589             ? write(fd, iov[i].iov_base, iov[i].iov_len)
590             : read(fd, iov[i].iov_base, iov[i].iov_len);
591         if (r > 0) {
592             ret += r;
593         } else if (!r) {
594             break;
595         } else if (errno == EINTR) {
596             continue;
597         } else {
598             /* else it is some "other" error,
599              * only return if there was no data processed. */
600             if (ret == 0) {
601                 ret = -1;
602             }
603             break;
604         }
605         i++;
606     }
607     return ret;
608 }
609 
610 ssize_t
611 readv(int fd, const struct iovec *iov, int iov_cnt)
612 {
613     return readv_writev(fd, iov, iov_cnt, false);
614 }
615 
616 ssize_t
617 writev(int fd, const struct iovec *iov, int iov_cnt)
618 {
619     return readv_writev(fd, iov, iov_cnt, true);
620 }
621 #endif
622 
623 struct dirent *
624 qemu_dirent_dup(struct dirent *dent)
625 {
626     size_t sz = 0;
627 #if defined _DIRENT_HAVE_D_RECLEN
628     /* Avoid use of strlen() if platform supports d_reclen. */
629     sz = dent->d_reclen;
630 #endif
631     /*
632      * Test sz for zero even if d_reclen is available
633      * because some drivers may set d_reclen to zero.
634      */
635     if (sz == 0) {
636         /* Fallback to the most portable way. */
637         sz = offsetof(struct dirent, d_name) +
638                       strlen(dent->d_name) + 1;
639     }
640     return g_memdup(dent, sz);
641 }
642