1 /* Copyright (c) 2005-2008, Google Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ---
31 * Author: Markus Gutschke, Carl Crous
32 */
33
34 #include "elfcore.h"
35 #if defined DUMPER
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39
40 #include <elf.h>
41 #include <fcntl.h>
42 #include <limits.h>
43 #include <linux/sched.h>
44 #include <pthread.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdint.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/poll.h>
51 #include <sys/prctl.h>
52 #include <sys/socket.h>
53 #include <sys/time.h>
54 #include <sys/uio.h>
55 #include <sys/wait.h>
56
57 #include "coredumper/coredumper.h"
58 #include "linux_syscall_support.h"
59 #include "linuxthreads.h"
60 #include "thread_lister.h"
61
62 #ifndef CLONE_UNTRACED
63 #define CLONE_UNTRACED 0x00800000
64 #endif
65
66 #ifndef AT_SYSINFO_EHDR
67 #define AT_SYSINFO_EHDR 33
68 #endif
69
70 #ifndef O_LARGEFILE
71 #if defined(__mips__)
72 #define O_LARGEFILE 0x2000
73 #elif defined(__ARM_ARCH_3__)
74 #define O_LARGEFILE 0400000
75 #elif defined(__PPC__) || defined(__ppc__)
76 #define O_LARGEFILE 0200000
77 #else
78 #define O_LARGEFILE 00100000 /* generic */
79 #endif
80 #endif
81
82 /* Data structures found in x86-32/64, ARM, and MIPS core dumps on Linux;
83 * similar data structures are defined in /usr/include/{linux,asm}/... but
84 * those headers conflict with the rest of the libc headers. So we cannot
85 * include them here.
86 */
87
88 #if defined(__i386__) || defined(__x86_64__)
89 #if !defined(__x86_64__)
90 typedef struct fpregs { /* FPU registers */
91 uint32_t cwd;
92 uint32_t swd;
93 uint32_t twd;
94 uint32_t fip;
95 uint32_t fcs;
96 uint32_t foo;
97 uint32_t fos;
98 uint32_t st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
99 } fpregs;
100 typedef struct fpxregs { /* SSE registers */
101 #define FPREGS fpxregs
102 #else
103 typedef struct fpxregs { /* x86-64 stores FPU registers in SSE struct */
104 } fpxregs;
105 typedef struct fpregs { /* FPU registers */
106 #define FPREGS fpregs
107 #endif
108 uint16_t cwd;
109 uint16_t swd;
110 uint16_t twd;
111 uint16_t fop;
112 uint32_t fip;
113 uint32_t fcs;
114 uint32_t foo;
115 uint32_t fos;
116 uint32_t mxcsr;
117 uint32_t mxcsr_mask;
118 uint32_t st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
119 uint32_t xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
120 uint32_t padding[24];
121 } FPREGS;
122 #undef FPREGS
123 #define regs i386_regs /* General purpose registers */
124 #elif defined(__ARM_ARCH_3__)
125 typedef struct fpxregs { /* No extended FPU registers on ARM */
126 } fpxregs;
127 typedef struct fpregs { /* FPU registers */
128 struct fp_reg {
129 unsigned int sign1 : 1;
130 unsigned int unused : 15;
131 unsigned int sign2 : 1;
132 unsigned int exponent : 14;
133 unsigned int j : 1;
134 unsigned int mantissa1 : 31;
135 unsigned int mantissa0 : 32;
136 } fpregs[8];
137 unsigned int fpsr : 32;
138 unsigned int fpcr : 32;
139 unsigned char ftype[8];
140 unsigned int init_flag;
141 } fpregs;
142 #define regs arm_regs /* General purpose registers */
143 #elif defined(__mips__)
144 typedef struct fpxregs { /* No extended FPU registers on MIPS */
145 } fpxregs;
146 typedef struct fpregs {
147 uint64_t fpuregs[32];
148 uint32_t fcr31;
149 uint32_t fir;
150 } fpregs;
151 #define regs mips_regs
152 #endif
153
154 typedef struct elf_timeval { /* Time value with microsecond resolution */
155 long tv_sec; /* Seconds */
156 long tv_usec; /* Microseconds */
157 } elf_timeval;
158
159 typedef struct elf_siginfo { /* Information about signal (unused) */
160 int32_t si_signo; /* Signal number */
161 int32_t si_code; /* Extra code */
162 int32_t si_errno; /* Errno */
163 } elf_siginfo;
164
165 typedef struct prstatus { /* Information about thread; includes CPU reg*/
166 elf_siginfo pr_info; /* Info associated with signal */
167 uint16_t pr_cursig; /* Current signal */
168 unsigned long pr_sigpend; /* Set of pending signals */
169 unsigned long pr_sighold; /* Set of held signals */
170 pid_t pr_pid; /* Process ID */
171 pid_t pr_ppid; /* Parent's process ID */
172 pid_t pr_pgrp; /* Group ID */
173 pid_t pr_sid; /* Session ID */
174 elf_timeval pr_utime; /* User time */
175 elf_timeval pr_stime; /* System time */
176 elf_timeval pr_cutime; /* Cumulative user time */
177 elf_timeval pr_cstime; /* Cumulative system time */
178 regs pr_reg; /* CPU registers */
179 uint32_t pr_fpvalid; /* True if math co-processor being used */
180 } prstatus;
181
182 typedef struct prpsinfo { /* Information about process */
183 unsigned char pr_state; /* Numeric process state */
184 char pr_sname; /* Char for pr_state */
185 unsigned char pr_zomb; /* Zombie */
186 signed char pr_nice; /* Nice val */
187 unsigned long pr_flag; /* Flags */
188 #if defined(__x86_64__) || defined(__mips__)
189 uint32_t pr_uid; /* User ID */
190 uint32_t pr_gid; /* Group ID */
191 #else
192 uint16_t pr_uid; /* User ID */
193 uint16_t pr_gid; /* Group ID */
194 #endif
195 pid_t pr_pid; /* Process ID */
196 pid_t pr_ppid; /* Parent's process ID */
197 pid_t pr_pgrp; /* Group ID */
198 pid_t pr_sid; /* Session ID */
199 char pr_fname[16]; /* Filename of executable */
200 char pr_psargs[80]; /* Initial part of arg list */
201 } prpsinfo;
202
203 typedef struct core_user { /* Ptrace returns this data for thread state */
204 #ifndef __mips__
205 struct regs regs; /* CPU registers */
206 unsigned long fpvalid; /* True if math co-processor being used */
207 #if defined(__i386__) || defined(__x86_64__)
208 struct fpregs fpregs; /* FPU registers */
209 #endif
210 unsigned long tsize; /* Text segment size in pages */
211 unsigned long dsize; /* Data segment size in pages */
212 unsigned long ssize; /* Stack segment size in pages */
213 unsigned long start_code; /* Starting virtual address of text */
214 unsigned long start_stack; /* Starting virtual address of stack area */
215 unsigned long signal; /* Signal that caused the core dump */
216 unsigned long reserved; /* No longer used */
217 struct regs *regs_ptr; /* Used by gdb to help find the CPU registers*/
218 #if defined(__i386__) || defined(__x86_64__)
219 struct fpregs *fpregs_ptr; /* Pointer to FPU registers */
220 #endif
221 unsigned long magic; /* Magic for old A.OUT core files */
222 char comm[32]; /* User command that was responsible */
223 unsigned long debugreg[8];
224 #if defined(__i386__) || defined(__x86_64__)
225 unsigned long error_code; /* CPU error code or 0 */
226 unsigned long fault_address; /* CR3 or 0 */
227 #elif defined(__ARM_ARCH_3__)
228 struct fpregs fpregs; /* FPU registers */
229 struct fpregs *fpregs_ptr; /* Pointer to FPU registers */
230 #endif
231 #endif
232 } core_user;
233
234 #if __WORDSIZE == 64
235 #define ELF_CLASS ELFCLASS64
236 #define Ehdr Elf64_Ehdr
237 #define Phdr Elf64_Phdr
238 #define Shdr Elf64_Shdr
239 #define Nhdr Elf64_Nhdr
240 #define auxv_t Elf64_auxv_t
241 #else
242 #define ELF_CLASS ELFCLASS32
243 #define Ehdr Elf32_Ehdr
244 #define Phdr Elf32_Phdr
245 #define Shdr Elf32_Shdr
246 #define Nhdr Elf32_Nhdr
247 #define auxv_t Elf32_auxv_t
248 #endif
249
250 #if defined(__x86_64__)
251 #define ELF_ARCH EM_X86_64
252 #elif defined(__i386__)
253 #define ELF_ARCH EM_386
254 #elif defined(__ARM_ARCH_3__)
255 #define ELF_ARCH EM_ARM
256 #elif defined(__mips__)
257 #define ELF_ARCH EM_MIPS
258 #endif
259
260 /* Wrap a class around system calls, in order to give us access to
261 * a private copy of errno. This only works in C++, but it has the
262 * advantage of not needing nested functions, which are a non-standard
263 * language extension.
264 */
265 #ifdef __cplusplus
266 namespace {
267 class SysCalls {
268 public:
269 #define SYS_CPLUSPLUS
270 #define SYS_ERRNO my_errno
271 #define SYS_INLINE inline
272 #define SYS_PREFIX -1
273 #undef SYS_LINUX_SYSCALL_SUPPORT_H
274 #include "linux_syscall_support.h"
SysCalls()275 SysCalls() : my_errno(0) {}
276 int my_errno;
277 };
278 } // namespace
279 #define ERRNO sys.my_errno
280 #else
281 #define ERRNO my_errno
282 #endif
283
284 /* Re-runs fn until it doesn't cause EINTR
285 */
286 #define NO_INTR(fn) \
287 do { \
288 } while ((fn) < 0 && errno == EINTR)
289 #define MY_NO_INTR(fn) \
290 do { \
291 } while ((fn) < 0 && ERRNO == EINTR)
292
293 /* Replacement memcpy. GCC's __builtin_memcpy causes cores?
294 * Yes I know the return value isn't the same as memcpy().
295 */
my_memcpy(void * dest,const void * src,size_t len)296 static void my_memcpy(void *dest, const void *src, size_t len) {
297 char *d = (char *)dest;
298 const char *s = (const char *)src;
299 size_t i;
300 for (i = 0; i < len; ++i) *(d++) = *(s++);
301 }
302
303 /* Wrapper for read() which is guaranteed to never return EINTR.
304 */
c_read(int f,void * buf,size_t bytes,int * errno_)305 static ssize_t c_read(int f, void *buf, size_t bytes, int *errno_) {
306 /* scope */ {
307 /* Define a private copy of syscall macros, which does not modify the
308 * global copy of errno.
309 */
310 #ifdef __cplusplus
311 #define sys0_read sys.read
312 SysCalls sys;
313 #else
314 int my_errno;
315 #define SYS_ERRNO my_errno
316 #define SYS_INLINE inline
317 #define SYS_PREFIX 0
318 #undef SYS_LINUX_SYSCALL_SUPPORT_H
319 #include "linux_syscall_support.h"
320 #endif
321
322 if (bytes > 0) {
323 ssize_t rc;
324 MY_NO_INTR(rc = sys0_read(f, buf, bytes));
325 if (rc < 0) {
326 *errno_ = ERRNO;
327 }
328 return rc;
329 }
330 return 0;
331 }
332 }
333
334 /* Wrapper for write() which is guaranteed to never return EINTR nor
335 * short writes.
336 */
c_write(int f,const void * void_buf,size_t bytes,int * errno_)337 static ssize_t c_write(int f, const void *void_buf, size_t bytes, int *errno_) {
338 /* scope */ {
339 /* Define a private copy of syscall macros, which does not modify the
340 * global copy of errno.
341 */
342 #ifdef __cplusplus
343 #define sys0_write sys.write
344 SysCalls sys;
345 #else
346 int my_errno;
347 #define SYS_ERRNO my_errno
348 #define SYS_INLINE inline
349 #undef SYS_LINUX_SYSCALL_SUPPORT_H
350 #define SYS_PREFIX 0
351 #include "linux_syscall_support.h"
352 #endif
353
354 const unsigned char *buf = (const unsigned char *)void_buf;
355 size_t len = bytes;
356 while (len > 0) {
357 ssize_t rc;
358 MY_NO_INTR(rc = sys0_write(f, buf, len));
359 if (rc < 0) {
360 *errno_ = ERRNO;
361 return rc;
362 } else if (rc == 0) {
363 break;
364 }
365 buf += rc;
366 len -= rc;
367 }
368 return bytes - len;
369 }
370 }
371
372 /* The simple synchronous writer is only used when outputting to a pipe
373 * instead of a file. In that case, we do not enforce a pre-determined
374 * maximum output size.
375 */
SimpleDone(void * f)376 static int SimpleDone(void *f) { return 0; }
377
378 /* Simple synchronous writer function used by CreateElfCore() when writing
379 * directly to a pipe.
380 */
SimpleWriter(void * f,const void * void_buf,size_t bytes)381 static ssize_t SimpleWriter(void *f, const void *void_buf, size_t bytes) {
382 return c_write(*(int *)f, void_buf, bytes, &errno);
383 }
384
385 struct WriterFds {
386 size_t max_length;
387 int write_fd;
388 int compressed_fd;
389 int out_fd;
390 };
391
392 /* Checks whether the maximum number of allowed bytes has been written
393 * to the output file already.
394 */
PipeDone(void * f)395 static int PipeDone(void *f) {
396 struct WriterFds *fds = (struct WriterFds *)f;
397 return fds->max_length == 0;
398 }
399
400 /* Writer function that writes directly to a file and honors size limits.
401 */
LimitWriter(void * f,const void * void_buf,size_t bytes)402 static ssize_t LimitWriter(void *f, const void *void_buf, size_t bytes) {
403 struct WriterFds *fds = (struct WriterFds *)f;
404 ssize_t rc;
405 if (bytes > fds->max_length) {
406 bytes = fds->max_length;
407 }
408 rc = c_write(fds->out_fd, void_buf, bytes, &errno);
409 if (rc > 0) {
410 fds->max_length -= rc;
411 }
412 return rc;
413 }
414
415 /* Writer function that can handle writing to one end of a compression
416 * pipeline, reading from the other end of the pipe as compressed data
417 * becomes available, and finally outputting it to a file.
418 */
PipeWriter(void * f,const void * void_buf,size_t bytes)419 static ssize_t PipeWriter(void *f, const void *void_buf, size_t bytes) {
420 const unsigned char *buf = (const unsigned char *)void_buf;
421 struct WriterFds *fds = (struct WriterFds *)f;
422 size_t len = bytes;
423 while (fds->max_length > 0 && len > 0) {
424 ssize_t rc;
425 struct kernel_pollfd pfd[2] = {{fds->compressed_fd, POLLIN, 0}, {fds->write_fd, POLLOUT, 0}};
426 int nfds = sys_poll(pfd, 2, -1);
427
428 if (nfds < 0) {
429 /* Abort on fatal unexpected I/O errors. */
430 break;
431 }
432
433 if (nfds > 0 && (pfd[0].revents & POLLIN)) {
434 /* Some compressed data has become available. Copy to output file. */
435 char scratch[4096];
436 for (;;) {
437 size_t l = sizeof(scratch);
438 if (l > fds->max_length) {
439 l = fds->max_length;
440 }
441
442 /* The following line is needed on MIPS. Not sure why. Compiler bug? */
443 errno = -1;
444
445 NO_INTR(rc = sys_read(fds->compressed_fd, scratch, l));
446 if (rc < 0) {
447 /* The file handle is set to be non-blocking, so we loop until
448 * read() returns -1.
449 */
450 if (errno == EAGAIN) {
451 break;
452 }
453 return -1;
454 } else if (rc == 0) {
455 fds->max_length = 0;
456 break;
457 }
458 rc = c_write(fds->out_fd, scratch, rc, &errno);
459 if (rc <= 0) {
460 return -1;
461 }
462 fds->max_length -= rc;
463 }
464 nfds--;
465 }
466 if (nfds > 0 && (pfd[1].revents & POLLOUT)) {
467 /* The compressor has consumed all previous data and is ready to
468 * receive more.
469 */
470 NO_INTR(rc = sys_write(fds->write_fd, buf, len));
471 if (rc < 0 && errno != EAGAIN) {
472 return -1;
473 }
474 buf += rc;
475 len -= rc;
476 }
477 }
478 return bytes - len;
479 }
480
481 /* Flush the remaining data (if any) from the pipe.
482 */
FlushPipe(struct WriterFds * fds)483 static int FlushPipe(struct WriterFds *fds) {
484 long flags;
485 NO_INTR(flags = sys_fcntl(fds->compressed_fd, F_GETFL, 0));
486 NO_INTR(sys_fcntl(fds->compressed_fd, F_SETFL, flags & ~O_NONBLOCK));
487 while (fds->max_length > 0) {
488 char scratch[4096];
489 size_t l = sizeof(scratch);
490 ssize_t rc;
491 if (l > fds->max_length) {
492 l = fds->max_length;
493 }
494 if (l > 0) {
495 NO_INTR(rc = sys_read(fds->compressed_fd, scratch, l));
496 if (rc < 0) {
497 return -1;
498 } else if (rc == 0) {
499 break;
500 }
501 if (c_write(fds->out_fd, scratch, rc, &errno) != rc) {
502 return -1;
503 }
504 fds->max_length -= rc;
505 }
506 }
507 return 0;
508 }
509
510 struct io {
511 int fd;
512 unsigned char *data, *end;
513 unsigned char buf[4096];
514 };
515
516 /* Reads one character from the "io" file. This function has the same
517 * semantics as fgetc(), but we cannot call any library functions at this
518 * time.
519 */
GetChar(struct io * io)520 static int GetChar(struct io *io) {
521 unsigned char *ptr = io->data;
522 if (ptr == io->end) {
523 /* Even though we are parsing one character at a time, read in larger
524 * chunks.
525 */
526 ssize_t n = c_read(io->fd, io->buf, sizeof(io->buf), &errno);
527 if (n <= 0) {
528 if (n == 0) errno = 0;
529 return -1;
530 }
531 ptr = &io->buf[0];
532 io->end = &io->buf[n];
533 }
534 io->data = ptr + 1;
535 return *ptr;
536 }
537
538 /* Place the hex number read from "io" into "*hex". The first non-hex
539 * character is returned (or -1 in the case of end-of-file). If read_first
540 * then we start by getting the next char, otherwise we get the current one.
541 */
GetHexHelper(struct io * io,size_t * hex,bool read_first,int init_char)542 static int GetHexHelper(struct io *io, size_t *hex, bool read_first, int init_char) {
543 int ch;
544 *hex = 0;
545 while (((ch = read_first ? GetChar(io) : init_char) >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') ||
546 (ch >= 'a' && ch <= 'f')) {
547 read_first = true;
548 *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
549 }
550
551 return ch;
552 }
553
GetHex(struct io * io,size_t * hex)554 static int GetHex(struct io *io, size_t *hex) { return GetHexHelper(io, hex, true, 0); }
555
GetHexWithInitChar(struct io * io,size_t * hex,int init_char)556 static int GetHexWithInitChar(struct io *io, size_t *hex, int init_char) {
557 return GetHexHelper(io, hex, false, init_char);
558 }
559
560 /* Computes the amount of leading zeros in a memory region.
561 */
LeadingZeros(int * loopback,void * mem,size_t len,size_t pagesize)562 static size_t LeadingZeros(int *loopback, void *mem, size_t len, size_t pagesize) {
563 char buf[pagesize];
564 size_t count;
565
566 char *ptr = buf;
567 for (count = 0; count < len;) {
568 /* Read a page by going through the pipe. Assume that we can write at
569 * least one page without blocking.
570 *
571 * "Normal" kernels do not require this hack. But some of the security
572 * patches (e.g. grsec) can be configured to disallow read access of
573 * executable pages. So, directly scanning the memory range would
574 * result in a segmentation fault.
575 *
576 * If we cannot access a page, we assume that it was all zeros.
577 */
578 if ((count % pagesize) == 0) {
579 if (c_write(loopback[1], (char *)mem + count, pagesize, &errno) < 0 ||
580 c_read(loopback[0], buf, pagesize, &errno) < 0) {
581 count += pagesize;
582 continue;
583 } else {
584 ptr = buf;
585 }
586 }
587 if (*ptr++) {
588 break;
589 }
590 count++;
591 }
592 return count & ~(pagesize - 1);
593 }
594
595 /* Dynamically determines the byte sex of the system. Returns non-zero
596 * for big-endian machines.
597 */
sex()598 static inline int sex() {
599 int probe = 1;
600 return !*(char *)&probe;
601 }
602
WriteThreadRegs(void * handle,ssize_t (* writer)(void *,const void *,size_t),prstatus * prstatus,pid_t pid,regs * regs,fpregs * fpregs,fpxregs * fpxregs)603 static int WriteThreadRegs(void *handle, ssize_t (*writer)(void *, const void *, size_t), prstatus *prstatus, pid_t pid,
604 regs *regs, fpregs *fpregs, fpxregs *fpxregs) {
605 Nhdr nhdr;
606 memset(&nhdr, 0, sizeof(Nhdr));
607 /* Process status and integer registers */
608 nhdr.n_namesz = 5;
609 nhdr.n_descsz = sizeof(struct prstatus);
610 nhdr.n_type = NT_PRSTATUS;
611 prstatus->pr_pid = pid;
612 prstatus->pr_reg = *regs;
613 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr) || writer(handle, "CORE\0\0\0\0", 8) != 8 ||
614 writer(handle, prstatus, sizeof(struct prstatus)) != sizeof(struct prstatus)) {
615 return -1;
616 }
617
618 /* FPU registers */
619 nhdr.n_descsz = sizeof(struct fpregs);
620 nhdr.n_type = NT_FPREGSET;
621 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr) || writer(handle, "CORE\0\0\0\0", 8) != 8 ||
622 writer(handle, fpregs, sizeof(struct fpregs)) != sizeof(struct fpregs)) {
623 return -1;
624 }
625
626 /* SSE registers */
627 #if defined(__i386__) && !defined(__x86_64__)
628 /* Linux on x86-64 stores all FPU registers in the SSE structure */
629 if (fpxregs) {
630 nhdr.n_namesz = 8;
631 nhdr.n_descsz = sizeof(struct fpxregs);
632 nhdr.n_type = NT_PRXFPREG;
633 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr) || writer(handle, "LINUX\000\000", 8) != 8 ||
634 writer(handle, fpxregs, sizeof(struct fpxregs)) != sizeof(struct fpxregs)) {
635 return -1;
636 }
637 }
638 #endif
639 return 0;
640 }
641
642 /* Read /proc/self/auxv (if it exists), count number of entries.
643 * Since we are already reading all entries, it is convenient
644 * to also return the address of VDSO Elf header, if AT_SYSINFO_EHDR
645 * is present.
646 */
CountAUXV(size_t * pnum_auxv,size_t * pvdso_ehdr)647 static void CountAUXV(size_t *pnum_auxv, size_t *pvdso_ehdr) {
648 int fd;
649 auxv_t auxv;
650 size_t num_auxv = 0, vdso_ehdr = 0;
651 NO_INTR(fd = sys_open("/proc/self/auxv", O_RDONLY, 0));
652 if (fd >= 0) {
653 ssize_t nread;
654 do {
655 NO_INTR(nread = sys_read(fd, &auxv, sizeof(auxv_t)));
656 if (sizeof(auxv_t) != nread) break;
657 num_auxv++;
658 if (auxv.a_type == AT_SYSINFO_EHDR) {
659 vdso_ehdr = (size_t)auxv.a_un.a_val;
660 }
661 } while (auxv.a_type != AT_NULL);
662 }
663 NO_INTR(sys_close(fd));
664 *pnum_auxv = num_auxv;
665 *pvdso_ehdr = vdso_ehdr;
666 return;
667 }
668
669 /* Verify that alleged vdso and its internals are sane (properly
670 * aligned, within readable memory etc. Returns NULL if not.
671 */
SanitizeVDSO(Ehdr * ehdr,size_t start,size_t end)672 static Ehdr *SanitizeVDSO(Ehdr *ehdr, size_t start, size_t end) {
673 const size_t ehdr_address = (size_t)ehdr; /* ehdr alias to avoid casts */
674 int i;
675 Phdr *phdr;
676 if (!ehdr_address || (ehdr_address & (sizeof(size_t) - 1))) {
677 /* Not properly aligned. Something goofy is going on. */
678 return NULL;
679 }
680 if (end <= ehdr_address + sizeof(Ehdr)) {
681 /* Entire Ehdr is not "covered" by expected region. */
682 return NULL;
683 }
684 if (ehdr->e_phoff & (sizeof(size_t) - 1)) {
685 /* Phdr not properly aligned */
686 return NULL;
687 }
688 phdr = (Phdr *)(ehdr_address + ehdr->e_phoff);
689 if ((size_t)phdr <= start || end <= (size_t)(phdr + ehdr->e_phnum)) {
690 /* Phdr[] is not "covered" by expected region. */
691 return NULL;
692 }
693 if (phdr[0].p_type != PT_LOAD || phdr[0].p_vaddr != start || phdr[0].p_vaddr + phdr[0].p_memsz >= end) {
694 /* Something goofy. */
695 return NULL;
696 }
697 for (i = 1; i < ehdr->e_phnum; i++) {
698 if (phdr[i].p_type == PT_LOAD) {
699 /* Only a single PT_LOAD at index 0 is expected */
700 return NULL;
701 }
702 if (phdr[i].p_vaddr & (sizeof(size_t) - 1)) {
703 /* Phdr data not properly aligned */
704 return NULL;
705 }
706 if (phdr[i].p_vaddr <= start || end <= phdr[i].p_vaddr + phdr[i].p_filesz) {
707 /* The data isn't in the expected range */
708 return NULL;
709 }
710 }
711 return ehdr;
712 }
713
714 /* This function is invoked from a separate process. It has access to a
715 * copy-on-write copy of the parents address space, and all crucial
716 * information about the parent has been computed by the caller.
717 */
CreateElfCore(void * handle,ssize_t (* writer)(void *,const void *,size_t),int (* is_done)(void *),prpsinfo * prpsinfo,core_user * user,prstatus * prstatus,int num_threads,pid_t * pids,regs * regs,fpregs * fpregs,fpxregs * fpxregs,size_t pagesize,size_t prioritize_max_length,pid_t main_pid,const struct CoredumperNote * extra_notes,int extra_notes_count)718 static int CreateElfCore(void *handle, ssize_t (*writer)(void *, const void *, size_t), int (*is_done)(void *),
719 prpsinfo *prpsinfo, core_user *user, prstatus *prstatus, int num_threads, pid_t *pids,
720 regs *regs, fpregs *fpregs, fpxregs *fpxregs, size_t pagesize, size_t prioritize_max_length,
721 pid_t main_pid, const struct CoredumperNote *extra_notes, int extra_notes_count) {
722 /* Count the number of mappings in "/proc/self/maps". We are guaranteed
723 * that this number is not going to change while this function executes.
724 */
725 int rc = -1, num_mappings = 0;
726 struct io io;
727 int loopback[2] = {-1, -1};
728 size_t num_auxv;
729 union {
730 Ehdr *ehdr;
731 size_t address;
732 } vdso;
733
734 if (sys_pipe(loopback) < 0) goto done;
735
736 io.data = io.end = 0;
737 NO_INTR(io.fd = sys_open("/proc/self/maps", O_RDONLY, 0));
738 if (io.fd >= 0) {
739 int i, ch;
740 while ((ch = GetChar(&io)) >= 0) {
741 num_mappings += (ch == '\n');
742 }
743 if (errno != 0) {
744 read_error:
745 NO_INTR(sys_close(io.fd));
746 goto done;
747 }
748 NO_INTR(sys_close(io.fd));
749
750 CountAUXV(&num_auxv, &vdso.address);
751 /* Read all mappings. This requires re-opening "/proc/self/maps" */
752 /* scope */ {
753 static const int PF_MASK = 0x00000007;
754 struct {
755 size_t start_address, end_address, offset, write_size;
756 int flags;
757 } mappings[num_mappings];
758 io.data = io.end = 0;
759 NO_INTR(io.fd = sys_open("/proc/self/smaps", O_RDONLY, 0));
760 if (io.fd >= 0) {
761 size_t note_align;
762 size_t num_extra_phdrs = 0;
763
764 if ((ch = GetChar(&io)) < 0) {
765 goto read_error;
766 }
767
768 /* Parse entries of the form:
769 * "^[0-9A-F]*-[0-9A-F]* [r-][w-][x-][p-] [0-9A-F]*.*$"
770 * At the start of each iteration, ch contains the first character.
771 */
772 for (i = 0; i < num_mappings;) {
773 static const char *const dev_zero = "/dev/zero";
774 const char *dev = dev_zero;
775 int j, is_device, is_anonymous;
776 int dontdump = 0;
777 int has_anonymous_pages = 0;
778 size_t zeros;
779
780 memset(&mappings[i], 0, sizeof(mappings[i]));
781
782 /* Read start and end addresses */
783 if (GetHexWithInitChar(&io, &mappings[i].start_address, ch) != '-' ||
784 GetHex(&io, &mappings[i].end_address) != ' ')
785 goto read_error;
786
787 /* Read flags */
788 while ((ch = GetChar(&io)) != ' ') {
789 if (ch < 0) goto read_error;
790 mappings[i].flags = (mappings[i].flags << 1) | (ch != '-');
791 }
792
793 /* Read offset */
794 if ((ch = GetHex(&io, &mappings[i].offset)) != ' ') goto read_error;
795
796 /* Skip over device numbers, and inode number */
797 for (j = 0; j < 2; j++) {
798 while (ch == ' ') {
799 ch = GetChar(&io);
800 }
801 while (ch != ' ' && ch != '\n') {
802 if (ch < 0) goto read_error;
803 ch = GetChar(&io);
804 }
805 while (ch == ' ') {
806 ch = GetChar(&io);
807 }
808 if (ch < 0) goto read_error;
809 }
810
811 /* Check whether this is a mapping for a device */
812 is_anonymous = (ch == '\n' || ch == '[');
813 while (*dev && ch == *dev) {
814 ch = GetChar(&io);
815 dev++;
816 }
817 is_device = dev >= dev_zero + 5 && ((ch != '\n' && ch != ' ') || *dev != '\000');
818
819 /* Skip until end of line */
820 while (ch != '\n') {
821 if (ch < 0) goto read_error;
822 ch = GetChar(&io);
823 }
824
825 /*
826 * Parse extra information from smaps.
827 * Each time through this loop we read one full line.
828 * Stop when we've parsed one memory segment's complete description.
829 * Afterwards ch will contain the first character of the next
830 * description, or EOF.
831 */
832 while (1) {
833 ch = GetChar(&io);
834 if (ch < 1 || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'))
835 /* EOF, or new memory segment description start */
836 break;
837
838 switch (ch) {
839 /* Anonymous: */
840 case 'A': {
841 const char *str = "Anonymous:";
842 while (*str && ch == *str) {
843 ch = GetChar(&io);
844 ++str;
845 }
846
847 if (*str == '\0') {
848 /* Check if there is at least one anonymous page */
849
850 /* Skip spaces until we reach the page number */
851 while (ch == ' ') ch = GetChar(&io);
852
853 /* Make sure we reached a digit */
854 if (ch < '0' || ch > '9') goto read_error;
855
856 has_anonymous_pages = ch != '0';
857 }
858 break;
859 }
860
861 /* VmFlags: */
862 case 'V': {
863 const char *str = "VmFlags:";
864 while (*str && ch == *str) {
865 ch = GetChar(&io);
866 ++str;
867 }
868
869 if (*str == '\0') {
870 /* Check the flags for "don't dump" (dd) */
871 while (ch == ' ') {
872 /* skip space before the flag */
873 while (ch == ' ') ch = GetChar(&io);
874
875 /* check if the flag is "dd" */
876 if (ch == 'd') {
877 ch = GetChar(&io);
878 if (ch == 'd') {
879 dontdump = true;
880 break;
881 }
882 }
883
884 /* skip any remaining flag characters */
885 while (ch >= 'a' && ch <= 'z') ch = GetChar(&io);
886 }
887 }
888 break;
889 }
890
891 default:
892 break;
893 }
894
895 /* Skip until end of line */
896 while (ch != '\n') {
897 if (ch < 0) goto read_error;
898
899 ch = GetChar(&io);
900 }
901 }
902
903 /* Drop the private/shared bit. This makes the flags compatible with
904 * the ELF access bits
905 */
906 mappings[i].flags = (mappings[i].flags >> 1) & PF_MASK;
907
908 /* Skip leading zeroed pages (as found in the stack segment) */
909 if ((mappings[i].flags & PF_R) && !is_device) {
910 zeros = LeadingZeros(loopback, (void *)mappings[i].start_address,
911 mappings[i].end_address - mappings[i].start_address, pagesize);
912 mappings[i].start_address += zeros;
913 }
914
915 /* Write segment content if the don't dump flag is not set, and one
916 * or more of the following is true:
917 * - the segment is anonymous
918 * - the segment is writable
919 * - the segment has anonymous pages
920 */
921 if (!dontdump && (is_anonymous || has_anonymous_pages || (mappings[i].flags & PF_W) != 0)) {
922 mappings[i].write_size = mappings[i].end_address - mappings[i].start_address;
923 }
924
925 /* Remove mapping, if it was not readable, or completely zero
926 * anyway. The former is usually the case of stack guard pages, and
927 * the latter occasionally happens for unused memory.
928 * Also, be careful not to touch mapped devices.
929 */
930 if ((mappings[i].flags & PF_R) == 0 || mappings[i].start_address == mappings[i].end_address || is_device) {
931 num_mappings--;
932 } else {
933 i++;
934 }
935 }
936 NO_INTR(sys_close(io.fd));
937
938 if (vdso.address) {
939 /* Sanity checks. */
940 for (i = 0; i < num_mappings; i++) {
941 size_t start = mappings[i].start_address;
942 size_t end = mappings[i].end_address;
943 if ((mappings[i].flags & PF_R) && start <= vdso.address && vdso.address < end) {
944 vdso.ehdr = SanitizeVDSO(vdso.ehdr, start, end);
945 break;
946 }
947 }
948 if (i == num_mappings) {
949 /* Did not find a mapping "covering" vdso.
950 * Something goofy is going on; will not dump it.
951 */
952 vdso.address = 0;
953 }
954 }
955
956 /* Write out the ELF header */
957 /* scope */ {
958 Ehdr ehdr;
959 if (vdso.address) {
960 /* We are going to add Phdrs that "belong" to vdso.
961 * This isn't strictly necessary, but matches what kernel code
962 * in fs/binfmt_elf.c does on platforms that have vdso.
963 */
964 Phdr *vdso_phdr = (Phdr *)(vdso.address + vdso.ehdr->e_phoff);
965 for (i = 0; i < vdso.ehdr->e_phnum; i++) {
966 if (vdso_phdr[i].p_type == PT_LOAD) {
967 /* This will be written as "normal" mapping */
968 } else {
969 num_extra_phdrs++;
970 }
971 }
972 }
973 memset(&ehdr, 0, sizeof(Ehdr));
974 ehdr.e_ident[0] = ELFMAG0;
975 ehdr.e_ident[1] = ELFMAG1;
976 ehdr.e_ident[2] = ELFMAG2;
977 ehdr.e_ident[3] = ELFMAG3;
978 ehdr.e_ident[4] = ELF_CLASS;
979 ehdr.e_ident[5] = sex() ? ELFDATA2MSB : ELFDATA2LSB;
980 ehdr.e_ident[6] = EV_CURRENT;
981 ehdr.e_type = ET_CORE;
982 ehdr.e_machine = ELF_ARCH;
983 ehdr.e_version = EV_CURRENT;
984 ehdr.e_phoff = sizeof(Ehdr);
985 ehdr.e_ehsize = sizeof(Ehdr);
986 ehdr.e_phentsize = sizeof(Phdr);
987 ehdr.e_phnum = num_mappings + num_extra_phdrs + 1;
988 ehdr.e_shentsize = sizeof(Shdr);
989 if (writer(handle, &ehdr, sizeof(Ehdr)) != sizeof(Ehdr)) {
990 goto done;
991 }
992 }
993
994 /* Write program headers, starting with the PT_NOTE entry */
995 /* scope */ {
996 Phdr phdr;
997 size_t offset = sizeof(Ehdr) + (num_mappings + num_extra_phdrs + 1) * sizeof(Phdr);
998 size_t filesz =
999 sizeof(Nhdr) + 8 + sizeof(struct prpsinfo) + (user ? sizeof(Nhdr) + 8 + sizeof(struct core_user) : 0) +
1000 num_threads * (+sizeof(Nhdr) + 8 + sizeof(struct prstatus) + sizeof(Nhdr) + 8 + sizeof(struct fpregs));
1001 #if defined(__i386__) && !defined(__x86_64__)
1002 if (fpxregs) {
1003 filesz += num_threads * (sizeof(Nhdr) + 8 + sizeof(struct fpxregs));
1004 }
1005 #endif
1006 /* Calculate how much space the extra notes will take. */
1007 for (i = 0; i < extra_notes_count; i++) {
1008 size_t name_size;
1009 name_size = strlen(extra_notes[i].name) + 1;
1010 filesz += sizeof(Nhdr) + name_size + extra_notes[i].description_size;
1011 /* Note names and descriptions are 4 byte aligned. */
1012 if (name_size % 4 != 0) {
1013 filesz += 4 - name_size % 4;
1014 }
1015 if (extra_notes[i].description_size % 4 != 0) {
1016 filesz += 4 - extra_notes[i].description_size % 4;
1017 }
1018 }
1019 /* Space for auxv note */
1020 if (num_auxv) {
1021 filesz += 8 + sizeof(Nhdr) + num_auxv * sizeof(auxv_t);
1022 }
1023
1024 memset(&phdr, 0, sizeof(Phdr));
1025 phdr.p_type = PT_NOTE;
1026 phdr.p_offset = offset;
1027 phdr.p_filesz = filesz;
1028 if (writer(handle, &phdr, sizeof(Phdr)) != sizeof(Phdr)) {
1029 goto done;
1030 }
1031
1032 /* Now follow with program headers for each of the memory segments */
1033 phdr.p_type = PT_LOAD;
1034 phdr.p_align = pagesize;
1035 phdr.p_paddr = 0;
1036 note_align = phdr.p_align - ((offset + filesz) % phdr.p_align);
1037 if (note_align == phdr.p_align) note_align = 0;
1038 offset += note_align;
1039
1040 /* If the option is set, remove the largest memory sections first
1041 * when limiting the size of the core dump.
1042 * If prioritize_max_length is zero, the prioritization option wasn't
1043 * set. If max_length was set to zero, we wouldn't have gotten this
1044 * far.
1045 */
1046 if (prioritize_max_length > 0) {
1047 /* Calculates the size of the vdso sections which are added to the
1048 * end of the file. These need to be preserved in order for the
1049 * core file to be useful.
1050 */
1051 size_t vdso_size = 0;
1052 if (vdso.address) {
1053 Phdr *vdso_phdr = (Phdr *)(vdso.address + vdso.ehdr->e_phoff);
1054 for (i = 0; i < vdso.ehdr->e_phnum; i++) {
1055 Phdr *p = vdso_phdr + i;
1056 if (p->p_type != PT_LOAD) {
1057 vdso_size += p->p_filesz;
1058 }
1059 }
1060 }
1061
1062 /* Loops while there isn't enough space for all the mappings. Each
1063 * iteration, the largest mapping will be reduced in size.
1064 */
1065 for (;;) {
1066 int largest = -1;
1067 size_t total_core_size = offset + filesz + vdso_size;
1068 /* Get the largest and total size of the core dump. */
1069 for (i = 0; i < num_mappings; i++) {
1070 total_core_size += mappings[i].write_size;
1071 if (largest < 0 || mappings[largest].write_size < mappings[i].write_size) {
1072 largest = i;
1073 }
1074 }
1075 /* If the total size of all the maps is more than our file size,
1076 * we must reduce the size of the largest map.
1077 */
1078 if (largest >= 0 && total_core_size > prioritize_max_length) {
1079 size_t space_needed = total_core_size - prioritize_max_length;
1080 /* If there is no more space to free in the mappings, we must
1081 * stop. The size limit will be preserved since if the
1082 * prioritized limiting is enabled, the limited writer will be
1083 * used.
1084 */
1085 if (mappings[largest].write_size > 0) {
1086 if (space_needed > mappings[largest].write_size) {
1087 mappings[largest].write_size = 0;
1088 continue;
1089 } else {
1090 mappings[largest].write_size -= space_needed;
1091 }
1092 }
1093 }
1094 break;
1095 }
1096 }
1097
1098 for (i = 0; i < num_mappings; i++) {
1099 offset += filesz;
1100 filesz = mappings[i].end_address - mappings[i].start_address;
1101 phdr.p_offset = offset;
1102 phdr.p_vaddr = mappings[i].start_address;
1103 phdr.p_memsz = filesz;
1104
1105 filesz = mappings[i].write_size;
1106 phdr.p_filesz = filesz;
1107 phdr.p_flags = mappings[i].flags & PF_MASK;
1108 if (writer(handle, &phdr, sizeof(Phdr)) != sizeof(Phdr)) {
1109 goto done;
1110 }
1111 }
1112 if (vdso.ehdr) {
1113 Phdr *vdso_phdr = (Phdr *)(vdso.address + vdso.ehdr->e_phoff);
1114 for (i = 0; i < vdso.ehdr->e_phnum; i++) {
1115 if (vdso_phdr[i].p_type != PT_LOAD) {
1116 memcpy(&phdr, vdso_phdr + i, sizeof(Phdr));
1117 offset += filesz;
1118 filesz = phdr.p_filesz;
1119 phdr.p_offset = offset;
1120 phdr.p_paddr = 0; /* match other core phdrs */
1121 if (writer(handle, &phdr, sizeof(Phdr)) != sizeof(Phdr)) {
1122 goto done;
1123 }
1124 }
1125 }
1126 }
1127 }
1128 /* Write note section */
1129 /* scope */ {
1130 Nhdr nhdr;
1131 memset(&nhdr, 0, sizeof(Nhdr));
1132 nhdr.n_namesz = 5;
1133 nhdr.n_descsz = sizeof(struct prpsinfo);
1134 nhdr.n_type = NT_PRPSINFO;
1135 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr) || writer(handle, "CORE\0\0\0\0", 8) != 8 ||
1136 writer(handle, prpsinfo, sizeof(struct prpsinfo)) != sizeof(struct prpsinfo)) {
1137 goto done;
1138 }
1139 if (user) {
1140 nhdr.n_descsz = sizeof(struct core_user);
1141 nhdr.n_type = NT_PRXREG;
1142 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr) || writer(handle, "CORE\0\0\0\0", 8) != 8 ||
1143 writer(handle, user, sizeof(struct core_user)) != sizeof(struct core_user)) {
1144 goto done;
1145 }
1146 }
1147 if (num_auxv) {
1148 /* Dump entire auxv[] array as NT_AUXV note, to match what
1149 * kernel code in fs/binfmt_elf.c does.
1150 * Without this, gdb can't unwind through vdso on i686.
1151 */
1152 int fd, i;
1153 NO_INTR(fd = sys_open("/proc/self/auxv", O_RDONLY, 0));
1154 if (fd == -1) {
1155 goto done;
1156 }
1157 nhdr.n_descsz = num_auxv * sizeof(auxv_t);
1158 nhdr.n_type = NT_AUXV;
1159 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr) || writer(handle, "CORE\0\0\0\0", 8) != 8) {
1160 NO_INTR(sys_close(fd));
1161 goto done;
1162 }
1163 for (i = 0; i < num_auxv; ++i) {
1164 ssize_t nread;
1165 auxv_t auxv;
1166 NO_INTR(nread = sys_read(fd, &auxv, sizeof(auxv_t)));
1167 if (nread != sizeof(auxv_t)) {
1168 NO_INTR(sys_close(fd));
1169 goto done;
1170 }
1171 if (writer(handle, &auxv, sizeof(auxv_t)) != sizeof(auxv_t)) {
1172 NO_INTR(sys_close(fd));
1173 goto done;
1174 }
1175 }
1176 }
1177 /* The order of threads in the output matters to gdb:
1178 * it assumes that the first one is the one that crashed.
1179 * Make it easier for the end-user to find crashing thread
1180 * by dumping it first.
1181 */
1182 for (i = num_threads; i-- > 0;) {
1183 if (pids[i] == main_pid) {
1184 if (WriteThreadRegs(handle, writer, prstatus, pids[i], regs + i, fpregs + i, fpxregs + i)) {
1185 goto done;
1186 }
1187 break;
1188 }
1189 }
1190 for (i = num_threads; i-- > 0;) {
1191 if (pids[i] != main_pid) {
1192 if (WriteThreadRegs(handle, writer, prstatus, pids[i], regs + i, fpregs + i, fpxregs + i)) {
1193 goto done;
1194 }
1195 }
1196 }
1197
1198 /* Write user provided notes */
1199 for (i = 0; i < extra_notes_count; i++) {
1200 size_t name_align = 0, description_align = 0;
1201 const char scratch[3] = {0, 0, 0};
1202 nhdr.n_namesz = strlen(extra_notes[i].name) + 1;
1203 nhdr.n_descsz = extra_notes[i].description_size;
1204 nhdr.n_type = extra_notes[i].type;
1205 /* Get the alignment for the data */
1206 if (nhdr.n_namesz % 4 != 0) {
1207 name_align = 4 - nhdr.n_namesz % 4;
1208 }
1209 if (nhdr.n_descsz % 4 != 0) {
1210 description_align = 4 - nhdr.n_descsz % 4;
1211 }
1212 /* Write the note header */
1213 if (writer(handle, &nhdr, sizeof(Nhdr)) != sizeof(Nhdr)) {
1214 goto done;
1215 }
1216 /* Write the note name and padding */
1217 if (writer(handle, extra_notes[i].name, nhdr.n_namesz) != nhdr.n_namesz) {
1218 goto done;
1219 }
1220 if (writer(handle, scratch, name_align) != name_align) {
1221 goto done;
1222 }
1223 /* Write the note description and padding */
1224 if (writer(handle, extra_notes[i].description, nhdr.n_descsz) != nhdr.n_descsz) {
1225 goto done;
1226 }
1227 if (writer(handle, scratch, description_align) != description_align) {
1228 goto done;
1229 }
1230 }
1231 }
1232
1233 /* Align all following segments to multiples of page size */
1234 if (note_align) {
1235 char scratch[note_align];
1236 memset(scratch, 0, note_align * sizeof(char));
1237 if (writer(handle, scratch, note_align * sizeof(char)) != note_align * sizeof(char)) {
1238 goto done;
1239 }
1240 }
1241
1242 /* Write all memory segments */
1243 for (i = 0; i < num_mappings; i++) {
1244 if (mappings[i].write_size > 0 &&
1245 writer(handle, (void *)mappings[i].start_address, mappings[i].write_size) != mappings[i].write_size) {
1246 goto done;
1247 }
1248 }
1249 if (vdso.address) {
1250 /* Finally write the contents of Phdrs that "belong" to vdso. */
1251 Phdr *vdso_phdr = (Phdr *)(vdso.address + vdso.ehdr->e_phoff);
1252 for (i = 0; i < vdso.ehdr->e_phnum; i++) {
1253 Phdr *p = vdso_phdr + i;
1254 if (p->p_type == PT_LOAD) {
1255 /* This segment has already been dumped, because it is one of
1256 * the mappings[].
1257 */
1258 } else if (writer(handle, (void *)p->p_vaddr, p->p_filesz) != p->p_filesz) {
1259 goto done;
1260 }
1261 }
1262 }
1263 rc = 0;
1264 }
1265 }
1266 }
1267
1268 done:
1269 if (is_done(handle)) {
1270 rc = 0;
1271 }
1272
1273 if (loopback[0] >= 0) NO_INTR(sys_close(loopback[0]));
1274 if (loopback[1] >= 0) NO_INTR(sys_close(loopback[1]));
1275 return rc;
1276 }
1277
1278 struct CreateArgs {
1279 int *fds;
1280 int openmax;
1281 const char *PATH;
1282 const struct CoredumperCompressor *compressors;
1283 int zip_in[2];
1284 int zip_out[2];
1285 };
1286
CreatePipelineChild(void * void_arg)1287 static int CreatePipelineChild(void *void_arg) {
1288 /* scope */ {
1289 /* Define a private copy of syscall macros, which does not modify the
1290 * global copy of errno.
1291 */
1292 #ifdef __cplusplus
1293 #define sys0_close sys.close
1294 #define sys0_dup sys.dup
1295 #define sys0_dup2 sys.dup2
1296 #define sys0_execve sys.execve
1297 #define sys0_open sys.open
1298 #define sys0_fcntl sys.fcntl
1299 SysCalls sys;
1300 #else
1301 int my_errno;
1302 #define SYS_ERRNO my_errno
1303 #define SYS_INLINE inline
1304 #define SYS_PREFIX 0
1305 #undef SYS_LINUX_SYSCALL_SUPPORT_H
1306 #include "linux_syscall_support.h"
1307 #endif
1308
1309 struct CreateArgs *args = (struct CreateArgs *)void_arg;
1310 int i;
1311
1312 /* Use pipe to tell parent about the compressor that we chose.
1313 * Make sure the file handle for the write-end of the pipe is
1314 * bigger than 2, so that it does not interfere with the
1315 * stdin/stdout/stderr file handles which must be 0-2.
1316 */
1317 MY_NO_INTR(sys0_close(args->fds[0]));
1318 while (args->fds[1] <= 2) {
1319 MY_NO_INTR(args->fds[1] = sys0_dup(args->fds[1]));
1320 }
1321 sys0_fcntl(args->fds[1], F_SETFD, FD_CLOEXEC);
1322
1323 /* Move the filehandles for stdin/stdout/stderr, so that they
1324 * map to handles 0-2. stdin/stdout are connected to pipes, and
1325 * stderr points to "/dev/null".
1326 */
1327 while (args->zip_in[0] <= 2) {
1328 MY_NO_INTR(args->zip_in[0] = sys0_dup(args->zip_in[0]));
1329 }
1330 while (args->zip_out[1] <= 2) {
1331 MY_NO_INTR(args->zip_out[1] = sys0_dup(args->zip_out[1]));
1332 }
1333 MY_NO_INTR(sys0_dup2(args->zip_in[0], 0));
1334 MY_NO_INTR(sys0_dup2(args->zip_out[1], 1));
1335 MY_NO_INTR(sys0_close(2));
1336 MY_NO_INTR(sys0_dup2(sys0_open("/dev/null", O_WRONLY, 0), 2));
1337
1338 /* Close all handles other than stdin/stdout/stderr and the
1339 * pipe to the parent. This also takes care of all the filehandles
1340 * that we temporarily created by calling sys_dup().
1341 */
1342 for (i = 3; i < args->openmax; i++)
1343 if (i != args->fds[1]) MY_NO_INTR(sys0_close(i));
1344
1345 while (args->compressors->compressor != NULL && *args->compressors->compressor) {
1346 extern char **environ;
1347
1348 const char *compressor = args->compressors->compressor;
1349 const char *const *cmd_args = args->compressors->args;
1350
1351 /* Try next compressor description. If the compressor exists,
1352 * the fds[1] file handle will get closed on exec(). The
1353 * parent detects this, and eventually updates
1354 * selected_compressor with the compressor that is now running.
1355 *
1356 * Please note, the caller does not need to call wait() for any
1357 * compressor that gets launched, because our parent process is
1358 * going to die soon; thus, the compressor will be reaped by "init".
1359 */
1360 c_write(args->fds[1], &args->compressors, sizeof(&args->compressors), &ERRNO);
1361 if (strchr(compressor, '/')) {
1362 /* Absolute or relative path precedes name of executable */
1363 sys0_execve(compressor, cmd_args, (const char *const *)environ);
1364 } else {
1365 /* Search for executable along PATH variable */
1366 const char *ptr = args->PATH;
1367 if (ptr != NULL) {
1368 for (;;) {
1369 const char *end = ptr;
1370 while (*end && *end != ':') end++;
1371 if (ptr == end) {
1372 /* Found current directory in PATH */
1373 sys0_execve(compressor, cmd_args, (const char *const *)environ);
1374 } else {
1375 /* Compute new file name */
1376 char executable[strlen(compressor) + (end - ptr) + 2];
1377 memcpy(executable, ptr, end - ptr);
1378 executable[end - ptr] = '/';
1379 strcpy(executable + (end - ptr + 1), compressor);
1380 sys0_execve(executable, cmd_args, (const char *const *)environ);
1381 }
1382 if (!*end) break;
1383 ptr = end + 1;
1384 }
1385 }
1386 }
1387 ++args->compressors;
1388 }
1389
1390 /* No suitable compressor found. Tell parent about it. */
1391 c_write(args->fds[1], &args->compressors, sizeof(&args->compressors), &ERRNO);
1392 MY_NO_INTR(sys0_close(args->fds[1]));
1393 sys__exit(0);
1394 return 0;
1395 }
1396 }
1397
1398 /* Create a pipeline for sending the core file from the child process back to
1399 * the caller. Optionally include a compressor program in the loop. The
1400 * "compressors" variable will be updated to point to the compressor that was
1401 * actually used.
1402 */
CreatePipeline(int * fds,int openmax,const char * PATH,const struct CoredumperCompressor ** compressors)1403 static int CreatePipeline(int *fds, int openmax, const char *PATH, const struct CoredumperCompressor **compressors) {
1404 int saved_errno1 = 0;
1405
1406 /* Create a pipe for communicating between processes */
1407 if (sys_pipe(fds) < 0) return -1;
1408
1409 /* Find a suitable compressor program, if necessary */
1410 if (*compressors != NULL && (*compressors)->compressor != NULL) {
1411 char stack[4096];
1412 struct CreateArgs args;
1413 pid_t comp_pid;
1414
1415 args.fds = fds;
1416 args.openmax = openmax;
1417 args.PATH = PATH;
1418 args.compressors = *compressors;
1419
1420 if (sys_pipe(args.zip_in) < 0) {
1421 fail0 : {
1422 int saved_errno = errno;
1423 NO_INTR(sys_close(fds[0]));
1424 NO_INTR(sys_close(fds[1]));
1425 errno = saved_errno;
1426 return -1;
1427 }
1428 } else if (sys_pipe(args.zip_out) < 0) {
1429 fail1 : {
1430 int saved_errno = errno;
1431 NO_INTR(sys_close(args.zip_in[0]));
1432 NO_INTR(sys_close(args.zip_in[1]));
1433 errno = saved_errno;
1434 goto fail0;
1435 }
1436 }
1437
1438 /* We use clone() here, instead of the more common fork(). This ensures
1439 * that the WriteCoreDump() code path never results in making a COW
1440 * instance of the processes' address space. This increases the likelihood
1441 * that we can dump core files even if we are using a lot of memory and
1442 * the kernel disallows overcomitting of memory.
1443 * After cloning, both the parent and the child share the same instance
1444 * of errno. We must make sure that at least one of these processes
1445 * (in our case, the child) uses modified syscall macros that update
1446 * a local copy of errno, instead.
1447 */
1448 comp_pid =
1449 sys_clone(CreatePipelineChild, stack + sizeof(stack) - 16, CLONE_VM | CLONE_UNTRACED | SIGCHLD, &args, 0, 0, 0);
1450 if (comp_pid < 0) {
1451 int clone_errno = errno;
1452 NO_INTR(sys_close(args.zip_out[0]));
1453 NO_INTR(sys_close(args.zip_out[1]));
1454 errno = clone_errno;
1455 goto fail1;
1456 }
1457
1458 /* Close write-end of pipe, and read from read-end until child closes
1459 * its reference to the pipe.
1460 */
1461 NO_INTR(sys_close(fds[1]));
1462 *compressors = NULL;
1463 while (c_read(fds[0], compressors, sizeof(*compressors), &errno)) {
1464 }
1465 NO_INTR(sys_close(fds[0]));
1466
1467 /* Fail if either the child never even executed (unlikely), or
1468 * did not find any compressor that could be executed.
1469 */
1470 if (*compressors == NULL || (*compressors)->compressor == NULL) {
1471 saved_errno1 = errno;
1472 NO_INTR(sys_close(args.zip_out[0]));
1473 NO_INTR(sys_close(args.zip_out[1]));
1474 errno = saved_errno1;
1475 fail2 : {
1476 int saved_errno2 = errno;
1477 NO_INTR(sys_close(args.zip_in[0]));
1478 NO_INTR(sys_close(args.zip_in[1]));
1479 errno = saved_errno2;
1480 return -1;
1481 }
1482 }
1483
1484 if (*(*compressors)->compressor) {
1485 /* Found a good compressor program, which is now connected to
1486 * zip_in/zip_out.
1487 */
1488 fds[0] = args.zip_out[0];
1489 fds[1] = args.zip_in[1];
1490 NO_INTR(sys_close(args.zip_in[0]));
1491 NO_INTR(sys_close(args.zip_out[1]));
1492 } else {
1493 /* No suitable compressor found, but the caller allowed
1494 * uncompressed core files. So, just close unneeded file handles,
1495 * and reap the child's exit code.
1496 */
1497 int status;
1498 fds[0] = -1;
1499 fds[1] = -1;
1500 NO_INTR(sys_close(args.zip_in[0]));
1501 NO_INTR(sys_close(args.zip_out[0]));
1502 NO_INTR(sys_close(args.zip_in[1]));
1503 NO_INTR(sys_close(args.zip_out[1]));
1504 while (sys_waitpid(comp_pid, &status, 0) < 0) {
1505 if (errno != EINTR) {
1506 goto fail2;
1507 }
1508 }
1509 }
1510 }
1511 return 0;
1512 }
1513
1514 /* If this code is being built without support for multi-threaded core files,
1515 * some of our basic assumptions are not quite right. Most noticably, the
1516 * fake thread lister ends up calling InternalGetCoreDump() from the main
1517 * (i.e. only) thread in the application, which cannot be ptrace()'d at this
1518 * time. This prevents us from retrieving CPU registers.
1519 *
1520 * We work around this problem by delaying the call to ptrace() until we
1521 * have forked. We also need to double-fork here, in order to make sure that
1522 * the core writer process can get reaped by "init" after it reaches EOF.
1523 */
GetParentRegs(void * frame,regs * cpu,fpregs * fp,fpxregs * fpx,int * hasSSE)1524 static inline int GetParentRegs(void *frame, regs *cpu, fpregs *fp, fpxregs *fpx, int *hasSSE) {
1525 #ifdef THREADS
1526 return 1;
1527 #else
1528 int rc = 0;
1529 char scratch[4096];
1530 pid_t pid = getppid();
1531 if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0, (void *)0) == 0 && waitpid(pid, (void *)0, __WALL) >= 0) {
1532 memset(scratch, 0xFF, sizeof(scratch));
1533 if (sys_ptrace(PTRACE_GETREGS, pid, scratch, scratch) == 0) {
1534 memcpy(cpu, scratch, sizeof(struct regs));
1535 SET_FRAME(*(Frame *)frame, *cpu);
1536 memset(scratch, 0xFF, sizeof(scratch));
1537 if (sys_ptrace(PTRACE_GETFPREGS, pid, scratch, scratch) == 0) {
1538 memcpy(fp, scratch, sizeof(struct fpregs));
1539 memset(scratch, 0xFF, sizeof(scratch));
1540 #if defined(__i386__) && !defined(__x86_64__)
1541 /* Linux on x86-64 stores all FPU registers in the SSE structure */
1542 if (sys_ptrace(PTRACE_GETFPXREGS, pid, scratch, scratch) == 0) {
1543 memcpy(fpx, scratch, sizeof(struct fpxregs));
1544 } else {
1545 *hasSSE = 0;
1546 }
1547 #else
1548 *hasSSE = 0;
1549 #endif
1550 rc = 1;
1551 }
1552 }
1553 }
1554 sys_ptrace_detach(pid);
1555
1556 /* Need to double-fork, so that "init" can reap the core writer upon EOF. */
1557 switch (sys_fork()) {
1558 case -1:
1559 return 0;
1560 case 0:
1561 return rc;
1562 default:
1563 sys__exit(0);
1564 }
1565 #endif
1566 }
1567
1568 /* Internal function for generating a core file. This function works for
1569 * both single- and multi-threaded core files. It assumes that all threads
1570 * are already suspended, and will resume them before returning.
1571 *
1572 * The caller must make sure that prctl(PR_SET_DUMPABLE, 1) has been called,
1573 * or this function might fail.
1574 */
InternalGetCoreDump(void * frame,int num_threads,pid_t * pids,va_list ap)1575 int InternalGetCoreDump(void *frame, int num_threads, pid_t *pids,
1576 va_list ap
1577 /* const struct CoreDumpParameters *params,
1578 const char *file_name,
1579 const char *PATH
1580 */) {
1581 long i;
1582 int rc = -1, fd = -1, threads = num_threads, hasSSE = 1;
1583 struct core_user user, *puser = &user;
1584 prpsinfo prpsinfo;
1585 prstatus prstatus;
1586 regs thread_regs[threads];
1587 fpregs thread_fpregs[threads];
1588 fpxregs thread_fpxregs[threads];
1589 int pair[2];
1590 int main_pid = ((Frame *)frame)->tid;
1591
1592 const struct CoreDumpParameters *params = va_arg(ap, const struct CoreDumpParameters *);
1593
1594 int (*callback_fn)(void *) = GetCoreDumpParameter(params, callback_fn);
1595 if (callback_fn) {
1596 void *arg = GetCoreDumpParameter(params, callback_arg);
1597 if (callback_fn(arg) != 0) {
1598 goto error;
1599 }
1600 }
1601
1602 /* Get thread status */
1603 memset(puser, 0, sizeof(struct core_user));
1604 memset(thread_regs, 0, threads * sizeof(struct regs));
1605 memset(thread_fpregs, 0, threads * sizeof(struct fpregs));
1606 memset(thread_fpxregs, 0, threads * sizeof(struct fpxregs));
1607
1608 /* Threads are already attached, read their registers now */
1609 #ifdef THREADS
1610 for (i = 0; i < threads; i++) {
1611 char scratch[4096];
1612 #ifdef __mips__
1613 /* MIPS kernels do not support PTRACE_GETREGS, instead we have to call
1614 * PTRACE_PEEKUSER go retrieve individual CPU registers. The indices
1615 * for these registers do not exactly match with the order in the
1616 * structures that get written to the core file, either. We use a lookup
1617 * table to do the mapping.
1618 * Incidentally, this also means that on MIPS we cannot use
1619 * PTRACE_PEEKUSER to fill "struct core_user". There just is no such thing
1620 * as a NT_PRXREG in our MIPS core files.
1621 */
1622 static const int map[sizeof(struct regs) / sizeof(long)] = {
1623 -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1624 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 67, 68, 64, 66, 69, 65, -1};
1625 int j;
1626 for (j = 0; j < sizeof(struct regs) / sizeof(long); j++) {
1627 if (map[j] >= 0 && sys_ptrace(PTRACE_PEEKUSER, pids[i], (void *)map[j], (unsigned long *)(thread_regs + i) + j)) {
1628 ResumeAllProcessThreads(threads, pids);
1629 goto error;
1630 }
1631 }
1632
1633 /* Older kernels do not support PTRACE_GETFPREGS, and require calling
1634 * PTRACE_PEEKUSER. This is a little awkward because of the layout of
1635 * "struct fpregs" that expands all 32bit variables to 64bits.
1636 */
1637 memset(thread_fpregs + i, 0xFF, sizeof(struct fpregs));
1638 for (j = 0; j < 32; j++) {
1639 if (sys_ptrace(PTRACE_PEEKUSER, pids[i], (void *)(32 + j), (uint64_t *)(thread_fpregs + i) + j)) {
1640 ResumeAllProcessThreads(threads, pids);
1641 goto error;
1642 }
1643 }
1644 if (sys_ptrace(PTRACE_PEEKUSER, pids[i], (void *)69, scratch) == 0) {
1645 memcpy(&thread_fpregs[i].fcr31, scratch, sizeof(thread_fpregs[i].fcr31));
1646 }
1647 if (sys_ptrace(PTRACE_PEEKUSER, pids[i], (void *)70, scratch) == 0) {
1648 memcpy(&thread_fpregs[i].fir, scratch, sizeof(thread_fpregs[i].fir));
1649 }
1650
1651 /* If the kernel supports it, PTRACE_GETFPREGS is a better way to
1652 * retrieve the FP registers.
1653 */
1654 if (sys_ptrace(PTRACE_GETFPREGS, pids[i], scratch, scratch) == 0) {
1655 memcpy(thread_fpregs + i, scratch, sizeof(struct fpregs));
1656 }
1657
1658 /* Set the saved integer registers, if we are looking at the thread that
1659 * called us.
1660 */
1661 if (main_pid == pids[i]) {
1662 SET_FRAME(*(Frame *)frame, thread_regs[i]);
1663 }
1664 hasSSE = 0;
1665 #else
1666 memset(scratch, 0xFF, sizeof(scratch));
1667 if (sys_ptrace(PTRACE_GETREGS, pids[i], scratch, scratch) == 0) {
1668 memcpy(thread_regs + i, scratch, sizeof(struct regs));
1669 if (main_pid == pids[i]) {
1670 SET_FRAME(*(Frame *)frame, thread_regs[i]);
1671 }
1672 memset(scratch, 0xFF, sizeof(scratch));
1673 if (sys_ptrace(PTRACE_GETFPREGS, pids[i], scratch, scratch) == 0) {
1674 memcpy(thread_fpregs + i, scratch, sizeof(struct fpregs));
1675 memset(scratch, 0xFF, sizeof(scratch));
1676 #if defined(__i386__) && !defined(__x86_64__)
1677 /* Linux on x86-64 stores all FPU registers in the SSE structure */
1678 if (sys_ptrace(PTRACE_GETFPXREGS, pids[i], scratch, scratch) == 0) {
1679 memcpy(thread_fpxregs + i, scratch, sizeof(struct fpxregs));
1680 } else {
1681 hasSSE = 0;
1682 }
1683 #else
1684 hasSSE = 0;
1685 #endif
1686 } else {
1687 goto ptrace;
1688 }
1689 } else {
1690 ptrace: /* Oh, well, undo everything and get out of here */
1691 ResumeAllProcessThreads(threads, pids);
1692 goto error;
1693 }
1694 #endif
1695 }
1696
1697 /* Get parent's CPU registers, and user data structure */
1698 {
1699 #ifndef __mips__
1700 for (i = 0; i < sizeof(struct core_user); i += sizeof(int)) {
1701 sys_ptrace(PTRACE_PEEKUSER, pids[0], (void *)i, ((char *)&user) + i);
1702 }
1703 /* Avoid using GCC's builtin memcpy... causes crashes in GCC 8.x at -O1?
1704 * I could not discover why this is... we are copying from one stack
1705 * buffer to another, so it's hard to imagine what could go wrong.
1706 * Unfortunately my assembly-fu is not sufficient to figure it out. */
1707
1708 /* Overwrite the regs from ptrace with the ones previously computed. */
1709 my_memcpy(&user.regs, thread_regs, sizeof(struct regs));
1710 #else
1711 puser = NULL;
1712 #endif
1713 }
1714 #endif
1715
1716 /* Build the PRPSINFO data structure */
1717 memset(&prpsinfo, 0, sizeof(struct prpsinfo));
1718 prpsinfo.pr_sname = 'R';
1719 prpsinfo.pr_nice = sys_getpriority(PRIO_PROCESS, 0);
1720 prpsinfo.pr_uid = sys_geteuid();
1721 prpsinfo.pr_gid = sys_getegid();
1722 prpsinfo.pr_pid = main_pid;
1723 prpsinfo.pr_ppid = sys_getppid();
1724 prpsinfo.pr_pgrp = sys_getpgrp();
1725 prpsinfo.pr_sid = sys_getsid(0);
1726 /* scope */ {
1727 char scratch[4096], *cmd = scratch, *ptr;
1728 ssize_t size, len;
1729 int cmd_fd;
1730 memset(&scratch, 0, sizeof(scratch));
1731 size = sys_readlink("/proc/self/exe", scratch, sizeof(scratch));
1732 len = 0;
1733 for (ptr = cmd; *ptr != '\000' && size-- > 0; ptr++) {
1734 if (*ptr == '/') {
1735 cmd = ptr + 1;
1736 len = 0;
1737 } else
1738 len++;
1739 }
1740 memcpy(prpsinfo.pr_fname, cmd, len > sizeof(prpsinfo.pr_fname) ? sizeof(prpsinfo.pr_fname) : len);
1741 NO_INTR(cmd_fd = sys_open("/proc/self/cmdline", O_RDONLY, 0));
1742 if (cmd_fd >= 0) {
1743 char *ptr;
1744 ssize_t size = c_read(cmd_fd, &prpsinfo.pr_psargs, sizeof(prpsinfo.pr_psargs), &errno);
1745 for (ptr = prpsinfo.pr_psargs; size-- > 0; ptr++)
1746 if (*ptr == '\000') *ptr = ' ';
1747 NO_INTR(sys_close(cmd_fd));
1748 }
1749 }
1750
1751 /* Build the PRSTATUS data structure */
1752 /* scope */ {
1753 int stat_fd;
1754 memset(&prstatus, 0, sizeof(struct prstatus));
1755 prstatus.pr_pid = prpsinfo.pr_pid;
1756 prstatus.pr_ppid = prpsinfo.pr_ppid;
1757 prstatus.pr_pgrp = prpsinfo.pr_pgrp;
1758 prstatus.pr_sid = prpsinfo.pr_sid;
1759 prstatus.pr_fpvalid = 1;
1760 NO_INTR(stat_fd = sys_open("/proc/self/stat", O_RDONLY, 0));
1761 if (stat_fd >= 0) {
1762 char scratch[4096];
1763 ssize_t size = c_read(stat_fd, scratch, sizeof(scratch) - 1, &errno);
1764 if (size >= 0) {
1765 unsigned long tms;
1766 char *ptr = scratch;
1767 scratch[size] = '\000';
1768
1769 /* User time */
1770 for (i = 13; i && *ptr; ptr++)
1771 if (*ptr == ' ') i--;
1772 tms = 0;
1773 while (*ptr && *ptr != ' ') tms = 10 * tms + *ptr++ - '0';
1774 prstatus.pr_utime.tv_sec = tms / 1000;
1775 prstatus.pr_utime.tv_usec = (tms % 1000) * 1000;
1776
1777 /* System time */
1778 if (*ptr) ptr++;
1779 tms = 0;
1780 while (*ptr && *ptr != ' ') tms = 10 * tms + *ptr++ - '0';
1781 prstatus.pr_stime.tv_sec = tms / 1000;
1782 prstatus.pr_stime.tv_usec = (tms % 1000) * 1000;
1783
1784 /* Cumulative user time */
1785 if (*ptr) ptr++;
1786 tms = 0;
1787 while (*ptr && *ptr != ' ') tms = 10 * tms + *ptr++ - '0';
1788 prstatus.pr_cutime.tv_sec = tms / 1000;
1789 prstatus.pr_cutime.tv_usec = (tms % 1000) * 1000;
1790
1791 /* Cumulative system time */
1792 if (*ptr) ptr++;
1793 tms = 0;
1794 while (*ptr && *ptr != ' ') tms = 10 * tms + *ptr++ - '0';
1795 prstatus.pr_cstime.tv_sec = tms / 1000;
1796 prstatus.pr_cstime.tv_usec = (tms % 1000) * 1000;
1797
1798 /* Pending signals */
1799 for (i = 14; i && *ptr; ptr++)
1800 if (*ptr == ' ') i--;
1801 while (*ptr && *ptr != ' ') prstatus.pr_sigpend = 10 * prstatus.pr_sigpend + *ptr++ - '0';
1802
1803 /* Held signals */
1804 if (*ptr) ptr++;
1805 while (*ptr && *ptr != ' ') prstatus.pr_sigpend = 10 * prstatus.pr_sigpend + *ptr++ - '0';
1806 }
1807 NO_INTR(sys_close(stat_fd));
1808 }
1809 }
1810
1811 /* scope */ {
1812 int openmax = sys_sysconf(_SC_OPEN_MAX);
1813 int pagesize = sys_sysconf(_SC_PAGESIZE);
1814 struct kernel_sigset_t old_signals, blocked_signals;
1815
1816 const char *file_name = va_arg(ap, const char *);
1817 size_t max_length = GetCoreDumpParameter(params, max_length);
1818 const char *PATH = va_arg(ap, const char *);
1819 const struct CoredumperCompressor *compressors = GetCoreDumpParameter(params, compressors);
1820 const struct CoredumperCompressor **selected_compressor =
1821 (const struct CoredumperCompressor **)GetCoreDumpParameter(params, selected_compressor);
1822 int prioritize = GetCoreDumpParameter(params, flags) & COREDUMPER_FLAG_LIMITED_BY_PRIORITY;
1823 const struct CoredumperNote *notes = GetCoreDumpParameter(params, notes);
1824 int note_count = GetCoreDumpParameter(params, note_count);
1825
1826 if (selected_compressor != NULL) {
1827 /* For now, assume that the core dump is uncompressed; we will later
1828 * override this setting, if we can find a suitable compressor program.
1829 */
1830 *selected_compressor = compressors;
1831 while (*selected_compressor && (*selected_compressor)->compressor != NULL) {
1832 ++*selected_compressor;
1833 }
1834 }
1835
1836 if (file_name == NULL) {
1837 /* Create a file descriptor that can be used for reading data from
1838 * our child process. This is a little complicated because we need
1839 * to make sure there is no race condition with other threads
1840 * calling fork() at the same time (this is somewhat mitigated,
1841 * because our threads are supposedly suspended at this time). We
1842 * have to avoid other processes holding our file handles open. We
1843 * can do this by creating the pipe in the child and passing the
1844 * file handle back to the parent.
1845 */
1846 if (sys_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0) {
1847 /* Block signals prior to forking. Technically, POSIX requires
1848 * us to call pthread_sigmask(), if this is a threaded
1849 * application. When using glibc, we are OK calling
1850 * sigprocmask(), though. We will end up blocking additional
1851 * signals that libpthread uses internally, but that
1852 * is actually exactly what we want.
1853 *
1854 * Also, POSIX claims that this should not actually be
1855 * necessarily, but reality says otherwise.
1856 */
1857 sys_sigfillset(&blocked_signals);
1858 sys_sigprocmask(SIG_BLOCK, &blocked_signals, &old_signals);
1859
1860 /* Create a new core dump in child process; call sys_fork() in order to
1861 * avoid complications with pthread_atfork() handlers. In the child
1862 * process, we should only ever call system calls.
1863 */
1864 if ((rc = sys_fork()) == 0) {
1865 int fds[2];
1866
1867 /* Create a pipe for communicating between processes. If
1868 * necessary, add a compressor to the pipeline.
1869 */
1870 if (CreatePipeline(fds, openmax, PATH, &compressors) < 0 || (fds[0] < 0 && sys_pipe(fds) < 0)) {
1871 sys__exit(1);
1872 }
1873
1874 /* Pass file handle to parent */
1875 /* scope */ {
1876 char cmsg_buf[CMSG_SPACE(sizeof(int))];
1877 struct kernel_iovec iov;
1878 struct kernel_msghdr msg;
1879 struct cmsghdr *cmsg;
1880 memset(&iov, 0, sizeof(iov));
1881 memset(&msg, 0, sizeof(msg));
1882 iov.iov_base = (void *)&compressors;
1883 iov.iov_len = sizeof(compressors);
1884 msg.msg_iov = &iov;
1885 msg.msg_iovlen = 1;
1886 msg.msg_control = &cmsg_buf;
1887 msg.msg_controllen = sizeof(cmsg_buf);
1888 cmsg = CMSG_FIRSTHDR(&msg);
1889 if (!cmsg) {
1890 /* This can't happen, but static analyzers still complain... */
1891 sys__exit(1);
1892 }
1893 cmsg->cmsg_level = SOL_SOCKET;
1894 cmsg->cmsg_type = SCM_RIGHTS;
1895 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
1896 *(int *)CMSG_DATA(cmsg) = fds[0];
1897 while (sys_sendmsg(pair[1], &msg, 0) < 0) {
1898 if (errno != EINTR) sys__exit(1);
1899 }
1900 while (sys_shutdown(pair[1], SHUT_RDWR) < 0) {
1901 if (errno != EINTR) sys__exit(1);
1902 }
1903 }
1904
1905 /* Close all file handles other than the write end of our pipe */
1906 for (i = 0; i < openmax; i++) {
1907 if (i != fds[1]) {
1908 NO_INTR(sys_close(i));
1909 }
1910 }
1911
1912 /* If compiled without threading support, this is the only
1913 * place where we can request the parent's CPU
1914 * registers. This function is a no-op when threading
1915 * support is available.
1916 */
1917 if (!GetParentRegs(frame, thread_regs, thread_fpregs, thread_fpxregs, &hasSSE)) {
1918 sys__exit(1);
1919 }
1920
1921 CreateElfCore(&fds[1], SimpleWriter, SimpleDone, &prpsinfo, puser, &prstatus, threads, pids, thread_regs,
1922 thread_fpregs, hasSSE ? thread_fpxregs : NULL, pagesize, 0, main_pid, notes, note_count);
1923 NO_INTR(sys_close(fds[1]));
1924 sys__exit(0);
1925
1926 /* Make the compiler happy. We never actually get here. */
1927 return 0;
1928 } else if (rc > 0) {
1929 #ifndef THREADS
1930 /* Child will double-fork, so reap the process, now. */
1931 sys_waitpid(rc, (void *)0, __WALL);
1932 #endif
1933 }
1934
1935 /* In the parent */
1936 sys_sigprocmask(SIG_SETMASK, &old_signals, (struct kernel_sigset_t *)0);
1937 NO_INTR(sys_close(pair[1]));
1938
1939 /* Get pipe file handle from child */
1940 /* scope */ {
1941 const struct CoredumperCompressor *buffer[1];
1942 char cmsg_buf[CMSG_SPACE(sizeof(int))];
1943 struct kernel_iovec iov;
1944 struct kernel_msghdr msg;
1945 for (;;) {
1946 int nbytes;
1947 memset(&iov, 0, sizeof(iov));
1948 memset(&msg, 0, sizeof(msg));
1949 iov.iov_base = buffer;
1950 iov.iov_len = sizeof(void *);
1951 msg.msg_iov = &iov;
1952 msg.msg_iovlen = 1;
1953 msg.msg_control = &cmsg_buf;
1954 msg.msg_controllen = sizeof(cmsg_buf);
1955 if ((nbytes = sys_recvmsg(pair[0], &msg, 0)) > 0) {
1956 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
1957 if (cmsg != NULL && cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
1958 fd = *(int *)CMSG_DATA(cmsg);
1959 if (nbytes == sizeof(void *) && *buffer != NULL && selected_compressor != NULL)
1960 *selected_compressor = *buffer;
1961 break;
1962 } else if (nbytes == 0 || errno != EINTR) {
1963 break;
1964 }
1965 }
1966 }
1967 sys_shutdown(pair[0], SHUT_RDWR);
1968 NO_INTR(sys_close(pair[0]));
1969 }
1970 } else {
1971 /* Synchronously write the core to a file. If necessary, compress the
1972 * data on the fly. All other threads are suspended during this time.
1973 * In principle, we could use the same code that we used earlier for
1974 * building a core file on the fly. But that results in creating a COW
1975 * copy of the address space (as a result of the call to fork()), and
1976 * some accounting applications are sensitive to the sudden spike in
1977 * memory usage.
1978 * So, instead, we run a single thread and make use of callback
1979 * functions that internally invoke poll() for managing the I/O.
1980 */
1981 int fds[2] = {-1, -1};
1982 int saved_errno, rc;
1983 const char *suffix = "";
1984 struct WriterFds writer_fds;
1985 ssize_t (*writer)(void *, const void *, size_t);
1986
1987 /* If compiled without threading support, this is the only
1988 * place where we can request the parent's CPU
1989 * registers. This function is a no-op when threading
1990 * support is available.
1991 */
1992 if (!GetParentRegs(frame, thread_regs, thread_fpregs, thread_fpxregs, &hasSSE)) {
1993 goto error;
1994 }
1995
1996 /* Create a pipe for communicating between processes. If
1997 * necessary, add a compressor to the pipeline.
1998 */
1999 if (compressors != NULL && compressors->compressor != NULL) {
2000 if (CreatePipeline(fds, openmax, PATH, &compressors) < 0) {
2001 goto error;
2002 }
2003 }
2004 if (selected_compressor) {
2005 *selected_compressor = compressors;
2006 }
2007
2008 writer_fds.out_fd = -1;
2009 if (max_length > 0) {
2010 /* Open the output file. If necessary, pick a filename suffix that
2011 * matches the selected compression type.
2012 */
2013 if (compressors != NULL && compressors->compressor != NULL && compressors->suffix != NULL) {
2014 suffix = compressors->suffix;
2015 }
2016 /* scope */ {
2017 const int kOpenFlags = O_WRONLY | O_CREAT | O_TRUNC;
2018 char extended_file_name[strlen(file_name) + strlen(suffix) + 1];
2019 strcat(strcpy(extended_file_name, file_name), suffix);
2020 NO_INTR(writer_fds.out_fd = sys_open(extended_file_name, kOpenFlags | O_LARGEFILE, 0600));
2021 if (writer_fds.out_fd < 0 && EINVAL == errno && O_LARGEFILE) {
2022 /* This kernel apears not to have large file support.
2023 * Try again without O_LARGEFILE.
2024 */
2025 NO_INTR(writer_fds.out_fd = sys_open(extended_file_name, kOpenFlags, 0600));
2026 }
2027 if (writer_fds.out_fd < 0) {
2028 saved_errno = errno;
2029 if (fds[0] >= 0) NO_INTR(sys_close(fds[0]));
2030 if (fds[1] >= 0) NO_INTR(sys_close(fds[1]));
2031 errno = saved_errno;
2032 goto error;
2033 }
2034 }
2035
2036 /* Set up a suitable writer funtion. */
2037 writer_fds.max_length = max_length;
2038 if (fds[0] >= 0) {
2039 /* The PipeWriter() can deal with multi I/O requests on the
2040 * compression pipeline.
2041 */
2042 long flags;
2043 NO_INTR(flags = sys_fcntl(fds[0], F_GETFL, 0));
2044 NO_INTR(sys_fcntl(fds[0], F_SETFL, flags | O_NONBLOCK));
2045 NO_INTR(flags = sys_fcntl(fds[1], F_GETFL, 0));
2046 NO_INTR(sys_fcntl(fds[1], F_SETFL, flags | O_NONBLOCK));
2047 writer_fds.write_fd = fds[1];
2048 writer_fds.compressed_fd = fds[0];
2049 writer = PipeWriter;
2050 } else {
2051 /* If no compression is needed, then we can directly write to the
2052 * file. This avoids quite a bit of unnecessary overhead.
2053 */
2054 writer = LimitWriter;
2055 }
2056
2057 rc = CreateElfCore(&writer_fds, writer, PipeDone, &prpsinfo, puser, &prstatus, threads, pids, thread_regs,
2058 thread_fpregs, hasSSE ? thread_fpxregs : NULL, pagesize, prioritize ? max_length : 0,
2059 main_pid, notes, note_count);
2060 if (fds[0] >= 0) {
2061 saved_errno = errno;
2062 /* Close the input side of the compression pipeline, and flush
2063 * the remaining compressed data bytes out to the file.
2064 */
2065 if (fds[1] >= 0) {
2066 NO_INTR(sys_close(fds[1]));
2067 fds[1] = -1;
2068 }
2069 if (FlushPipe(&writer_fds) < 0) {
2070 rc = -1;
2071 } else {
2072 errno = saved_errno;
2073 }
2074 }
2075 } else {
2076 rc = 0;
2077 }
2078
2079 /* Close all remaining open file handles. */
2080 saved_errno = errno;
2081 if (writer_fds.out_fd >= 0) NO_INTR(sys_close(writer_fds.out_fd));
2082 if (fds[0] >= 0) NO_INTR(sys_close(fds[0]));
2083 if (fds[1] >= 0) NO_INTR(sys_close(fds[1]));
2084 errno = saved_errno;
2085
2086 if (rc < 0) {
2087 goto error;
2088 }
2089
2090 /* If called with a filename, we do not actually return a file handle,
2091 * but instead just signal whether the core file has been written
2092 * successfully.
2093 */
2094 fd = 0;
2095 }
2096 }
2097
2098 ResumeAllProcessThreads(threads, pids);
2099 return fd;
2100
2101 error:
2102 /* scope */ {
2103 int saved_errno = errno;
2104 if (fd > 0) NO_INTR(sys_close(fd));
2105 errno = saved_errno;
2106 }
2107 ResumeAllProcessThreads(threads, pids);
2108 return -1;
2109 }
2110
2111 #ifdef __cplusplus
2112 }
2113 #endif
2114 #endif
2115