xref: /qemu/util/osdep.c (revision 35b64664)
1 /*
2  * QEMU low level functions
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qapi/error.h"
26 
27 /* Needed early for CONFIG_BSD etc. */
28 
29 #ifdef CONFIG_SOLARIS
30 #include <sys/statvfs.h>
31 /* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
32    discussion about Solaris header problems */
33 extern int madvise(char *, size_t, int);
34 #endif
35 
36 #include "qemu-common.h"
37 #include "qemu/cutils.h"
38 #include "qemu/sockets.h"
39 #include "qemu/error-report.h"
40 #include "qemu/madvise.h"
41 #include "qemu/mprotect.h"
42 #include "qemu/hw-version.h"
43 #include "monitor/monitor.h"
44 
45 static bool fips_enabled = false;
46 
47 static const char *hw_version = QEMU_HW_VERSION;
48 
49 int socket_set_cork(int fd, int v)
50 {
51 #if defined(SOL_TCP) && defined(TCP_CORK)
52     return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
53 #else
54     return 0;
55 #endif
56 }
57 
58 int socket_set_nodelay(int fd)
59 {
60     int v = 1;
61     return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
62 }
63 
64 int qemu_madvise(void *addr, size_t len, int advice)
65 {
66     if (advice == QEMU_MADV_INVALID) {
67         errno = EINVAL;
68         return -1;
69     }
70 #if defined(CONFIG_MADVISE)
71     return madvise(addr, len, advice);
72 #elif defined(CONFIG_POSIX_MADVISE)
73     return posix_madvise(addr, len, advice);
74 #else
75     errno = EINVAL;
76     return -1;
77 #endif
78 }
79 
80 static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
81 {
82     g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask));
83     g_assert(!(size & ~qemu_real_host_page_mask));
84 
85 #ifdef _WIN32
86     DWORD old_protect;
87 
88     if (!VirtualProtect(addr, size, prot, &old_protect)) {
89         g_autofree gchar *emsg = g_win32_error_message(GetLastError());
90         error_report("%s: VirtualProtect failed: %s", __func__, emsg);
91         return -1;
92     }
93     return 0;
94 #else
95     if (mprotect(addr, size, prot)) {
96         error_report("%s: mprotect failed: %s", __func__, strerror(errno));
97         return -1;
98     }
99     return 0;
100 #endif
101 }
102 
103 int qemu_mprotect_rw(void *addr, size_t size)
104 {
105 #ifdef _WIN32
106     return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
107 #else
108     return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
109 #endif
110 }
111 
112 int qemu_mprotect_rwx(void *addr, size_t size)
113 {
114 #ifdef _WIN32
115     return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
116 #else
117     return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
118 #endif
119 }
120 
121 int qemu_mprotect_none(void *addr, size_t size)
122 {
123 #ifdef _WIN32
124     return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
125 #else
126     return qemu_mprotect__osdep(addr, size, PROT_NONE);
127 #endif
128 }
129 
130 #ifndef _WIN32
131 
132 static int fcntl_op_setlk = -1;
133 static int fcntl_op_getlk = -1;
134 
135 /*
136  * Dups an fd and sets the flags
137  */
138 int qemu_dup_flags(int fd, int flags)
139 {
140     int ret;
141     int serrno;
142     int dup_flags;
143 
144     ret = qemu_dup(fd);
145     if (ret == -1) {
146         goto fail;
147     }
148 
149     dup_flags = fcntl(ret, F_GETFL);
150     if (dup_flags == -1) {
151         goto fail;
152     }
153 
154     if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
155         errno = EINVAL;
156         goto fail;
157     }
158 
159     /* Set/unset flags that we can with fcntl */
160     if (fcntl(ret, F_SETFL, flags) == -1) {
161         goto fail;
162     }
163 
164     /* Truncate the file in the cases that open() would truncate it */
165     if (flags & O_TRUNC ||
166             ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
167         if (ftruncate(ret, 0) == -1) {
168             goto fail;
169         }
170     }
171 
172     return ret;
173 
174 fail:
175     serrno = errno;
176     if (ret != -1) {
177         close(ret);
178     }
179     errno = serrno;
180     return -1;
181 }
182 
183 int qemu_dup(int fd)
184 {
185     int ret;
186 #ifdef F_DUPFD_CLOEXEC
187     ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
188 #else
189     ret = dup(fd);
190     if (ret != -1) {
191         qemu_set_cloexec(ret);
192     }
193 #endif
194     return ret;
195 }
196 
197 static int qemu_parse_fdset(const char *param)
198 {
199     return qemu_parse_fd(param);
200 }
201 
202 static void qemu_probe_lock_ops(void)
203 {
204     if (fcntl_op_setlk == -1) {
205 #ifdef F_OFD_SETLK
206         int fd;
207         int ret;
208         struct flock fl = {
209             .l_whence = SEEK_SET,
210             .l_start  = 0,
211             .l_len    = 0,
212             .l_type   = F_WRLCK,
213         };
214 
215         fd = open("/dev/null", O_RDWR);
216         if (fd < 0) {
217             fprintf(stderr,
218                     "Failed to open /dev/null for OFD lock probing: %s\n",
219                     strerror(errno));
220             fcntl_op_setlk = F_SETLK;
221             fcntl_op_getlk = F_GETLK;
222             return;
223         }
224         ret = fcntl(fd, F_OFD_GETLK, &fl);
225         close(fd);
226         if (!ret) {
227             fcntl_op_setlk = F_OFD_SETLK;
228             fcntl_op_getlk = F_OFD_GETLK;
229         } else {
230             fcntl_op_setlk = F_SETLK;
231             fcntl_op_getlk = F_GETLK;
232         }
233 #else
234         fcntl_op_setlk = F_SETLK;
235         fcntl_op_getlk = F_GETLK;
236 #endif
237     }
238 }
239 
240 bool qemu_has_ofd_lock(void)
241 {
242     qemu_probe_lock_ops();
243 #ifdef F_OFD_SETLK
244     return fcntl_op_setlk == F_OFD_SETLK;
245 #else
246     return false;
247 #endif
248 }
249 
250 static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
251 {
252     int ret;
253     struct flock fl = {
254         .l_whence = SEEK_SET,
255         .l_start  = start,
256         .l_len    = len,
257         .l_type   = fl_type,
258     };
259     qemu_probe_lock_ops();
260     do {
261         ret = fcntl(fd, fcntl_op_setlk, &fl);
262     } while (ret == -1 && errno == EINTR);
263     return ret == -1 ? -errno : 0;
264 }
265 
266 int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
267 {
268     return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
269 }
270 
271 int qemu_unlock_fd(int fd, int64_t start, int64_t len)
272 {
273     return qemu_lock_fcntl(fd, start, len, F_UNLCK);
274 }
275 
276 int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
277 {
278     int ret;
279     struct flock fl = {
280         .l_whence = SEEK_SET,
281         .l_start  = start,
282         .l_len    = len,
283         .l_type   = exclusive ? F_WRLCK : F_RDLCK,
284     };
285     qemu_probe_lock_ops();
286     ret = fcntl(fd, fcntl_op_getlk, &fl);
287     if (ret == -1) {
288         return -errno;
289     } else {
290         return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
291     }
292 }
293 #endif
294 
295 static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
296 {
297     int ret;
298 #ifdef O_CLOEXEC
299     ret = open(name, flags | O_CLOEXEC, mode);
300 #else
301     ret = open(name, flags, mode);
302     if (ret >= 0) {
303         qemu_set_cloexec(ret);
304     }
305 #endif
306     return ret;
307 }
308 
309 /*
310  * Opens a file with FD_CLOEXEC set
311  */
312 static int
313 qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
314 {
315     int ret;
316 
317 #ifndef _WIN32
318     const char *fdset_id_str;
319 
320     /* Attempt dup of fd from fd set */
321     if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
322         int64_t fdset_id;
323         int dupfd;
324 
325         fdset_id = qemu_parse_fdset(fdset_id_str);
326         if (fdset_id == -1) {
327             error_setg(errp, "Could not parse fdset %s", name);
328             errno = EINVAL;
329             return -1;
330         }
331 
332         dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
333         if (dupfd == -1) {
334             error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
335                              name, flags);
336             return -1;
337         }
338 
339         return dupfd;
340     }
341 #endif
342 
343     ret = qemu_open_cloexec(name, flags, mode);
344 
345     if (ret == -1) {
346         const char *action = flags & O_CREAT ? "create" : "open";
347 #ifdef O_DIRECT
348         /* Give more helpful error message for O_DIRECT */
349         if (errno == EINVAL && (flags & O_DIRECT)) {
350             ret = open(name, flags & ~O_DIRECT, mode);
351             if (ret != -1) {
352                 close(ret);
353                 error_setg(errp, "Could not %s '%s': "
354                            "filesystem does not support O_DIRECT",
355                            action, name);
356                 errno = EINVAL; /* restore first open()'s errno */
357                 return -1;
358             }
359         }
360 #endif /* O_DIRECT */
361         error_setg_errno(errp, errno, "Could not %s '%s'",
362                          action, name);
363     }
364 
365     return ret;
366 }
367 
368 
369 int qemu_open(const char *name, int flags, Error **errp)
370 {
371     assert(!(flags & O_CREAT));
372 
373     return qemu_open_internal(name, flags, 0, errp);
374 }
375 
376 
377 int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
378 {
379     assert(!(flags & O_CREAT));
380 
381     return qemu_open_internal(name, flags | O_CREAT, mode, errp);
382 }
383 
384 
385 int qemu_open_old(const char *name, int flags, ...)
386 {
387     va_list ap;
388     mode_t mode = 0;
389     int ret;
390 
391     va_start(ap, flags);
392     if (flags & O_CREAT) {
393         mode = va_arg(ap, int);
394     }
395     va_end(ap);
396 
397     ret = qemu_open_internal(name, flags, mode, NULL);
398 
399 #ifdef O_DIRECT
400     if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
401         error_report("file system may not support O_DIRECT");
402         errno = EINVAL; /* in case it was clobbered */
403     }
404 #endif /* O_DIRECT */
405 
406     return ret;
407 }
408 
409 int qemu_close(int fd)
410 {
411     int64_t fdset_id;
412 
413     /* Close fd that was dup'd from an fdset */
414     fdset_id = monitor_fdset_dup_fd_find(fd);
415     if (fdset_id != -1) {
416         int ret;
417 
418         ret = close(fd);
419         if (ret == 0) {
420             monitor_fdset_dup_fd_remove(fd);
421         }
422 
423         return ret;
424     }
425 
426     return close(fd);
427 }
428 
429 /*
430  * Delete a file from the filesystem, unless the filename is /dev/fdset/...
431  *
432  * Returns: On success, zero is returned.  On error, -1 is returned,
433  * and errno is set appropriately.
434  */
435 int qemu_unlink(const char *name)
436 {
437     if (g_str_has_prefix(name, "/dev/fdset/")) {
438         return 0;
439     }
440 
441     return unlink(name);
442 }
443 
444 /*
445  * A variant of write(2) which handles partial write.
446  *
447  * Return the number of bytes transferred.
448  * Set errno if fewer than `count' bytes are written.
449  *
450  * This function don't work with non-blocking fd's.
451  * Any of the possibilities with non-blocking fd's is bad:
452  *   - return a short write (then name is wrong)
453  *   - busy wait adding (errno == EAGAIN) to the loop
454  */
455 ssize_t qemu_write_full(int fd, const void *buf, size_t count)
456 {
457     ssize_t ret = 0;
458     ssize_t total = 0;
459 
460     while (count) {
461         ret = write(fd, buf, count);
462         if (ret < 0) {
463             if (errno == EINTR)
464                 continue;
465             break;
466         }
467 
468         count -= ret;
469         buf += ret;
470         total += ret;
471     }
472 
473     return total;
474 }
475 
476 /*
477  * Opens a socket with FD_CLOEXEC set
478  */
479 int qemu_socket(int domain, int type, int protocol)
480 {
481     int ret;
482 
483 #ifdef SOCK_CLOEXEC
484     ret = socket(domain, type | SOCK_CLOEXEC, protocol);
485     if (ret != -1 || errno != EINVAL) {
486         return ret;
487     }
488 #endif
489     ret = socket(domain, type, protocol);
490     if (ret >= 0) {
491         qemu_set_cloexec(ret);
492     }
493 
494     return ret;
495 }
496 
497 /*
498  * Accept a connection and set FD_CLOEXEC
499  */
500 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
501 {
502     int ret;
503 
504 #ifdef CONFIG_ACCEPT4
505     ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
506     if (ret != -1 || errno != ENOSYS) {
507         return ret;
508     }
509 #endif
510     ret = accept(s, addr, addrlen);
511     if (ret >= 0) {
512         qemu_set_cloexec(ret);
513     }
514 
515     return ret;
516 }
517 
518 void qemu_set_hw_version(const char *version)
519 {
520     hw_version = version;
521 }
522 
523 const char *qemu_hw_version(void)
524 {
525     return hw_version;
526 }
527 
528 void fips_set_state(bool requested)
529 {
530 #ifdef __linux__
531     if (requested) {
532         FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
533         if (fds != NULL) {
534             fips_enabled = (fgetc(fds) == '1');
535             fclose(fds);
536         }
537     }
538 #else
539     fips_enabled = false;
540 #endif /* __linux__ */
541 
542 #ifdef _FIPS_DEBUG
543     fprintf(stderr, "FIPS mode %s (requested %s)\n",
544             (fips_enabled ? "enabled" : "disabled"),
545             (requested ? "enabled" : "disabled"));
546 #endif
547 }
548 
549 bool fips_get_state(void)
550 {
551     return fips_enabled;
552 }
553 
554 #ifdef _WIN32
555 static void socket_cleanup(void)
556 {
557     WSACleanup();
558 }
559 #endif
560 
561 int socket_init(void)
562 {
563 #ifdef _WIN32
564     WSADATA Data;
565     int ret, err;
566 
567     ret = WSAStartup(MAKEWORD(2, 2), &Data);
568     if (ret != 0) {
569         err = WSAGetLastError();
570         fprintf(stderr, "WSAStartup: %d\n", err);
571         return -1;
572     }
573     atexit(socket_cleanup);
574 #endif
575     return 0;
576 }
577 
578 
579 #ifndef CONFIG_IOVEC
580 /* helper function for iov_send_recv() */
581 static ssize_t
582 readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
583 {
584     unsigned i = 0;
585     ssize_t ret = 0;
586     while (i < iov_cnt) {
587         ssize_t r = do_write
588             ? write(fd, iov[i].iov_base, iov[i].iov_len)
589             : read(fd, iov[i].iov_base, iov[i].iov_len);
590         if (r > 0) {
591             ret += r;
592         } else if (!r) {
593             break;
594         } else if (errno == EINTR) {
595             continue;
596         } else {
597             /* else it is some "other" error,
598              * only return if there was no data processed. */
599             if (ret == 0) {
600                 ret = -1;
601             }
602             break;
603         }
604         i++;
605     }
606     return ret;
607 }
608 
609 ssize_t
610 readv(int fd, const struct iovec *iov, int iov_cnt)
611 {
612     return readv_writev(fd, iov, iov_cnt, false);
613 }
614 
615 ssize_t
616 writev(int fd, const struct iovec *iov, int iov_cnt)
617 {
618     return readv_writev(fd, iov, iov_cnt, true);
619 }
620 #endif
621