xref: /qemu/util/oslib-posix.c (revision 814bb12a)
1 /*
2  * os-posix-lib.c
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2010 Red Hat, Inc.
6  *
7  * QEMU library functions on POSIX which are shared between QEMU and
8  * the QEMU tools.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 
29 #include "qemu/osdep.h"
30 #include <termios.h>
31 
32 #include <glib/gprintf.h>
33 
34 #include "sysemu/sysemu.h"
35 #include "trace.h"
36 #include "qapi/error.h"
37 #include "qemu/sockets.h"
38 #include <libgen.h>
39 #include <sys/signal.h>
40 #include "qemu/cutils.h"
41 
42 #ifdef CONFIG_LINUX
43 #include <sys/syscall.h>
44 #endif
45 
46 #ifdef __FreeBSD__
47 #include <sys/sysctl.h>
48 #include <libutil.h>
49 #endif
50 
51 #include "qemu/mmap-alloc.h"
52 
53 #ifdef CONFIG_DEBUG_STACK_USAGE
54 #include "qemu/error-report.h"
55 #endif
56 
57 int qemu_get_thread_id(void)
58 {
59 #if defined(__linux__)
60     return syscall(SYS_gettid);
61 #else
62     return getpid();
63 #endif
64 }
65 
66 int qemu_daemon(int nochdir, int noclose)
67 {
68     return daemon(nochdir, noclose);
69 }
70 
71 void *qemu_oom_check(void *ptr)
72 {
73     if (ptr == NULL) {
74         fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
75         abort();
76     }
77     return ptr;
78 }
79 
80 void *qemu_try_memalign(size_t alignment, size_t size)
81 {
82     void *ptr;
83 
84     if (alignment < sizeof(void*)) {
85         alignment = sizeof(void*);
86     }
87 
88 #if defined(_POSIX_C_SOURCE) && !defined(__sun__)
89     int ret;
90     ret = posix_memalign(&ptr, alignment, size);
91     if (ret != 0) {
92         errno = ret;
93         ptr = NULL;
94     }
95 #elif defined(CONFIG_BSD)
96     ptr = valloc(size);
97 #else
98     ptr = memalign(alignment, size);
99 #endif
100     trace_qemu_memalign(alignment, size, ptr);
101     return ptr;
102 }
103 
104 void *qemu_memalign(size_t alignment, size_t size)
105 {
106     return qemu_oom_check(qemu_try_memalign(alignment, size));
107 }
108 
109 /* alloc shared memory pages */
110 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
111 {
112     size_t align = QEMU_VMALLOC_ALIGN;
113     void *ptr = qemu_ram_mmap(-1, size, align, false);
114 
115     if (ptr == MAP_FAILED) {
116         return NULL;
117     }
118 
119     if (alignment) {
120         *alignment = align;
121     }
122 
123     trace_qemu_anon_ram_alloc(size, ptr);
124     return ptr;
125 }
126 
127 void qemu_vfree(void *ptr)
128 {
129     trace_qemu_vfree(ptr);
130     free(ptr);
131 }
132 
133 void qemu_anon_ram_free(void *ptr, size_t size)
134 {
135     trace_qemu_anon_ram_free(ptr, size);
136     qemu_ram_munmap(ptr, size);
137 }
138 
139 void qemu_set_block(int fd)
140 {
141     int f;
142     f = fcntl(fd, F_GETFL);
143     fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
144 }
145 
146 void qemu_set_nonblock(int fd)
147 {
148     int f;
149     f = fcntl(fd, F_GETFL);
150     fcntl(fd, F_SETFL, f | O_NONBLOCK);
151 }
152 
153 int socket_set_fast_reuse(int fd)
154 {
155     int val = 1, ret;
156 
157     ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
158                      (const char *)&val, sizeof(val));
159 
160     assert(ret == 0);
161 
162     return ret;
163 }
164 
165 void qemu_set_cloexec(int fd)
166 {
167     int f;
168     f = fcntl(fd, F_GETFD);
169     fcntl(fd, F_SETFD, f | FD_CLOEXEC);
170 }
171 
172 /*
173  * Creates a pipe with FD_CLOEXEC set on both file descriptors
174  */
175 int qemu_pipe(int pipefd[2])
176 {
177     int ret;
178 
179 #ifdef CONFIG_PIPE2
180     ret = pipe2(pipefd, O_CLOEXEC);
181     if (ret != -1 || errno != ENOSYS) {
182         return ret;
183     }
184 #endif
185     ret = pipe(pipefd);
186     if (ret == 0) {
187         qemu_set_cloexec(pipefd[0]);
188         qemu_set_cloexec(pipefd[1]);
189     }
190 
191     return ret;
192 }
193 
194 int qemu_utimens(const char *path, const struct timespec *times)
195 {
196     struct timeval tv[2], tv_now;
197     struct stat st;
198     int i;
199 #ifdef CONFIG_UTIMENSAT
200     int ret;
201 
202     ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
203     if (ret != -1 || errno != ENOSYS) {
204         return ret;
205     }
206 #endif
207     /* Fallback: use utimes() instead of utimensat() */
208 
209     /* happy if special cases */
210     if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
211         return 0;
212     }
213     if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
214         return utimes(path, NULL);
215     }
216 
217     /* prepare for hard cases */
218     if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
219         gettimeofday(&tv_now, NULL);
220     }
221     if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
222         stat(path, &st);
223     }
224 
225     for (i = 0; i < 2; i++) {
226         if (times[i].tv_nsec == UTIME_NOW) {
227             tv[i].tv_sec = tv_now.tv_sec;
228             tv[i].tv_usec = tv_now.tv_usec;
229         } else if (times[i].tv_nsec == UTIME_OMIT) {
230             tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
231             tv[i].tv_usec = 0;
232         } else {
233             tv[i].tv_sec = times[i].tv_sec;
234             tv[i].tv_usec = times[i].tv_nsec / 1000;
235         }
236     }
237 
238     return utimes(path, &tv[0]);
239 }
240 
241 char *
242 qemu_get_local_state_pathname(const char *relative_pathname)
243 {
244     return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
245                            relative_pathname);
246 }
247 
248 void qemu_set_tty_echo(int fd, bool echo)
249 {
250     struct termios tty;
251 
252     tcgetattr(fd, &tty);
253 
254     if (echo) {
255         tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
256     } else {
257         tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
258     }
259 
260     tcsetattr(fd, TCSANOW, &tty);
261 }
262 
263 static char exec_dir[PATH_MAX];
264 
265 void qemu_init_exec_dir(const char *argv0)
266 {
267     char *dir;
268     char *p = NULL;
269     char buf[PATH_MAX];
270 
271     assert(!exec_dir[0]);
272 
273 #if defined(__linux__)
274     {
275         int len;
276         len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
277         if (len > 0) {
278             buf[len] = 0;
279             p = buf;
280         }
281     }
282 #elif defined(__FreeBSD__)
283     {
284         static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
285         size_t len = sizeof(buf) - 1;
286 
287         *buf = '\0';
288         if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
289             *buf) {
290             buf[sizeof(buf) - 1] = '\0';
291             p = buf;
292         }
293     }
294 #endif
295     /* If we don't have any way of figuring out the actual executable
296        location then try argv[0].  */
297     if (!p) {
298         if (!argv0) {
299             return;
300         }
301         p = realpath(argv0, buf);
302         if (!p) {
303             return;
304         }
305     }
306     dir = g_path_get_dirname(p);
307 
308     pstrcpy(exec_dir, sizeof(exec_dir), dir);
309 
310     g_free(dir);
311 }
312 
313 char *qemu_get_exec_dir(void)
314 {
315     return g_strdup(exec_dir);
316 }
317 
318 static sigjmp_buf sigjump;
319 
320 static void sigbus_handler(int signal)
321 {
322     siglongjmp(sigjump, 1);
323 }
324 
325 void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp)
326 {
327     int ret;
328     struct sigaction act, oldact;
329     sigset_t set, oldset;
330 
331     memset(&act, 0, sizeof(act));
332     act.sa_handler = &sigbus_handler;
333     act.sa_flags = 0;
334 
335     ret = sigaction(SIGBUS, &act, &oldact);
336     if (ret) {
337         error_setg_errno(errp, errno,
338             "os_mem_prealloc: failed to install signal handler");
339         return;
340     }
341 
342     /* unblock SIGBUS */
343     sigemptyset(&set);
344     sigaddset(&set, SIGBUS);
345     pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
346 
347     if (sigsetjmp(sigjump, 1)) {
348         error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
349             "pages available to allocate guest RAM\n");
350     } else {
351         int i;
352         size_t hpagesize = qemu_fd_getpagesize(fd);
353         size_t numpages = DIV_ROUND_UP(memory, hpagesize);
354 
355         /* MAP_POPULATE silently ignores failures */
356         for (i = 0; i < numpages; i++) {
357             memset(area + (hpagesize * i), 0, 1);
358         }
359     }
360 
361     ret = sigaction(SIGBUS, &oldact, NULL);
362     if (ret) {
363         /* Terminate QEMU since it can't recover from error */
364         perror("os_mem_prealloc: failed to reinstall signal handler");
365         exit(1);
366     }
367     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
368 }
369 
370 
371 static struct termios oldtty;
372 
373 static void term_exit(void)
374 {
375     tcsetattr(0, TCSANOW, &oldtty);
376 }
377 
378 static void term_init(void)
379 {
380     struct termios tty;
381 
382     tcgetattr(0, &tty);
383     oldtty = tty;
384 
385     tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
386                           |INLCR|IGNCR|ICRNL|IXON);
387     tty.c_oflag |= OPOST;
388     tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
389     tty.c_cflag &= ~(CSIZE|PARENB);
390     tty.c_cflag |= CS8;
391     tty.c_cc[VMIN] = 1;
392     tty.c_cc[VTIME] = 0;
393 
394     tcsetattr(0, TCSANOW, &tty);
395 
396     atexit(term_exit);
397 }
398 
399 int qemu_read_password(char *buf, int buf_size)
400 {
401     uint8_t ch;
402     int i, ret;
403 
404     printf("password: ");
405     fflush(stdout);
406     term_init();
407     i = 0;
408     for (;;) {
409         ret = read(0, &ch, 1);
410         if (ret == -1) {
411             if (errno == EAGAIN || errno == EINTR) {
412                 continue;
413             } else {
414                 break;
415             }
416         } else if (ret == 0) {
417             ret = -1;
418             break;
419         } else {
420             if (ch == '\r' ||
421                 ch == '\n') {
422                 ret = 0;
423                 break;
424             }
425             if (i < (buf_size - 1)) {
426                 buf[i++] = ch;
427             }
428         }
429     }
430     term_exit();
431     buf[i] = '\0';
432     printf("\n");
433     return ret;
434 }
435 
436 
437 char *qemu_get_pid_name(pid_t pid)
438 {
439     char *name = NULL;
440 
441 #if defined(__FreeBSD__)
442     /* BSDs don't have /proc, but they provide a nice substitute */
443     struct kinfo_proc *proc = kinfo_getproc(pid);
444 
445     if (proc) {
446         name = g_strdup(proc->ki_comm);
447         free(proc);
448     }
449 #else
450     /* Assume a system with reasonable procfs */
451     char *pid_path;
452     size_t len;
453 
454     pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
455     g_file_get_contents(pid_path, &name, &len, NULL);
456     g_free(pid_path);
457 #endif
458 
459     return name;
460 }
461 
462 
463 pid_t qemu_fork(Error **errp)
464 {
465     sigset_t oldmask, newmask;
466     struct sigaction sig_action;
467     int saved_errno;
468     pid_t pid;
469 
470     /*
471      * Need to block signals now, so that child process can safely
472      * kill off caller's signal handlers without a race.
473      */
474     sigfillset(&newmask);
475     if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
476         error_setg_errno(errp, errno,
477                          "cannot block signals");
478         return -1;
479     }
480 
481     pid = fork();
482     saved_errno = errno;
483 
484     if (pid < 0) {
485         /* attempt to restore signal mask, but ignore failure, to
486          * avoid obscuring the fork failure */
487         (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
488         error_setg_errno(errp, saved_errno,
489                          "cannot fork child process");
490         errno = saved_errno;
491         return -1;
492     } else if (pid) {
493         /* parent process */
494 
495         /* Restore our original signal mask now that the child is
496          * safely running. Only documented failures are EFAULT (not
497          * possible, since we are using just-grabbed mask) or EINVAL
498          * (not possible, since we are using correct arguments).  */
499         (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
500     } else {
501         /* child process */
502         size_t i;
503 
504         /* Clear out all signal handlers from parent so nothing
505          * unexpected can happen in our child once we unblock
506          * signals */
507         sig_action.sa_handler = SIG_DFL;
508         sig_action.sa_flags = 0;
509         sigemptyset(&sig_action.sa_mask);
510 
511         for (i = 1; i < NSIG; i++) {
512             /* Only possible errors are EFAULT or EINVAL The former
513              * won't happen, the latter we expect, so no need to check
514              * return value */
515             (void)sigaction(i, &sig_action, NULL);
516         }
517 
518         /* Unmask all signals in child, since we've no idea what the
519          * caller's done with their signal mask and don't want to
520          * propagate that to children */
521         sigemptyset(&newmask);
522         if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
523             Error *local_err = NULL;
524             error_setg_errno(&local_err, errno,
525                              "cannot unblock signals");
526             error_report_err(local_err);
527             _exit(1);
528         }
529     }
530     return pid;
531 }
532 
533 void *qemu_alloc_stack(size_t *sz)
534 {
535     void *ptr, *guardpage;
536 #ifdef CONFIG_DEBUG_STACK_USAGE
537     void *ptr2;
538 #endif
539     size_t pagesz = getpagesize();
540 #ifdef _SC_THREAD_STACK_MIN
541     /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
542     long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
543     *sz = MAX(MAX(min_stack_sz, 0), *sz);
544 #endif
545     /* adjust stack size to a multiple of the page size */
546     *sz = ROUND_UP(*sz, pagesz);
547     /* allocate one extra page for the guard page */
548     *sz += pagesz;
549 
550     ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,
551                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
552     if (ptr == MAP_FAILED) {
553         abort();
554     }
555 
556 #if defined(HOST_IA64)
557     /* separate register stack */
558     guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
559 #elif defined(HOST_HPPA)
560     /* stack grows up */
561     guardpage = ptr + *sz - pagesz;
562 #else
563     /* stack grows down */
564     guardpage = ptr;
565 #endif
566     if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
567         abort();
568     }
569 
570 #ifdef CONFIG_DEBUG_STACK_USAGE
571     for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
572         *(uint32_t *)ptr2 = 0xdeadbeaf;
573     }
574 #endif
575 
576     return ptr;
577 }
578 
579 #ifdef CONFIG_DEBUG_STACK_USAGE
580 static __thread unsigned int max_stack_usage;
581 #endif
582 
583 void qemu_free_stack(void *stack, size_t sz)
584 {
585 #ifdef CONFIG_DEBUG_STACK_USAGE
586     unsigned int usage;
587     void *ptr;
588 
589     for (ptr = stack + getpagesize(); ptr < stack + sz;
590          ptr += sizeof(uint32_t)) {
591         if (*(uint32_t *)ptr != 0xdeadbeaf) {
592             break;
593         }
594     }
595     usage = sz - (uintptr_t) (ptr - stack);
596     if (usage > max_stack_usage) {
597         error_report("thread %d max stack usage increased from %u to %u",
598                      qemu_get_thread_id(), max_stack_usage, usage);
599         max_stack_usage = usage;
600     }
601 #endif
602 
603     munmap(stack, sz);
604 }
605