1 // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2 // Copyright (c) 2006, Google Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include <config.h>
32 #if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
33 # define PLATFORM_WINDOWS 1
34 #endif
35 
36 #include <ctype.h>    // for isspace()
37 #include <stdlib.h>   // for getenv()
38 #include <stdio.h>    // for snprintf(), sscanf()
39 #include <string.h>   // for memmove(), memchr(), etc.
40 #include <fcntl.h>    // for open()
41 #include <errno.h>    // for errno
42 #ifdef HAVE_UNISTD_H
43 #include <unistd.h>   // for read()
44 #endif
45 #if defined __MACH__          // Mac OS X, almost certainly
46 #include <mach-o/dyld.h>      // for iterating over dll's in ProcMapsIter
47 #include <mach-o/loader.h>    // for iterating over dll's in ProcMapsIter
48 #include <sys/types.h>
49 #include <sys/sysctl.h>       // how we figure out numcpu's on OS X
50 #elif defined __FreeBSD__
51 #include <sys/sysctl.h>
52 #elif defined __sun__         // Solaris
53 #include <procfs.h>           // for, e.g., prmap_t
54 #elif defined(PLATFORM_WINDOWS)
55 #include <process.h>          // for getpid() (actually, _getpid())
56 #include <shlwapi.h>          // for SHGetValueA()
57 #include <tlhelp32.h>         // for Module32First()
58 #endif
59 #include "base/sysinfo.h"
60 #include "base/commandlineflags.h"
61 #include "base/dynamic_annotations.h"   // for RunningOnValgrind
62 #include "base/logging.h"
63 
64 #ifdef PLATFORM_WINDOWS
65 #ifdef MODULEENTRY32
66 // In a change from the usual W-A pattern, there is no A variant of
67 // MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
68 // In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be
69 // MODULEENTRY32W.  These #undefs are the only way I see to get back
70 // access to the original, ascii struct (and related functions).
71 #undef MODULEENTRY32
72 #undef Module32First
73 #undef Module32Next
74 #undef PMODULEENTRY32
75 #undef LPMODULEENTRY32
76 #endif  /* MODULEENTRY32 */
77 // MinGW doesn't seem to define this, perhaps some windowsen don't either.
78 #ifndef TH32CS_SNAPMODULE32
79 #define TH32CS_SNAPMODULE32  0
80 #endif  /* TH32CS_SNAPMODULE32 */
81 #endif  /* PLATFORM_WINDOWS */
82 
83 // Re-run fn until it doesn't cause EINTR.
84 #define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
85 
86 // open/read/close can set errno, which may be illegal at this
87 // time, so prefer making the syscalls directly if we can.
88 #ifdef HAVE_SYS_SYSCALL_H
89 # include <sys/syscall.h>
90 #endif
91 #ifdef SYS_open   // solaris 11, at least sometimes, only defines SYS_openat
92 # define safeopen(filename, mode)  syscall(SYS_open, filename, mode)
93 #else
94 # define safeopen(filename, mode)  open(filename, mode)
95 #endif
96 #ifdef SYS_read
97 # define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
98 #else
99 # define saferead(fd, buffer, size)  read(fd, buffer, size)
100 #endif
101 #ifdef SYS_close
102 # define safeclose(fd)  syscall(SYS_close, fd)
103 #else
104 # define safeclose(fd)  close(fd)
105 #endif
106 
107 // ----------------------------------------------------------------------
108 // GetenvBeforeMain()
109 // GetUniquePathFromEnv()
110 //    Some non-trivial getenv-related functions.
111 // ----------------------------------------------------------------------
112 
113 // we reimplement memcmp and friends to avoid depending on any glibc
114 // calls too early in the process lifetime. This allows us to use
115 // GetenvBeforeMain from inside ifunc handler
slow_memcmp(const void * _a,const void * _b,size_t n)116 static int slow_memcmp(const void *_a, const void *_b, size_t n) {
117   const uint8_t *a = reinterpret_cast<const uint8_t *>(_a);
118   const uint8_t *b = reinterpret_cast<const uint8_t *>(_b);
119   while (n-- != 0) {
120     uint8_t ac = *a++;
121     uint8_t bc = *b++;
122     if (ac != bc) {
123       if (ac < bc) {
124         return -1;
125       }
126       return 1;
127     }
128   }
129   return 0;
130 }
131 
slow_memchr(const char * s,int c,size_t n)132 static const char *slow_memchr(const char *s, int c, size_t n) {
133   uint8_t ch = static_cast<uint8_t>(c);
134   while (n--) {
135     if (*s++ == ch) {
136       return s - 1;
137     }
138   }
139   return 0;
140 }
141 
slow_strlen(const char * s)142 static size_t slow_strlen(const char *s) {
143   const char *s2 = slow_memchr(s, '\0', static_cast<size_t>(-1));
144   return s2 - s;
145 }
146 
147 // It's not safe to call getenv() in the malloc hooks, because they
148 // might be called extremely early, before libc is done setting up
149 // correctly.  In particular, the thread library may not be done
150 // setting up errno.  So instead, we use the built-in __environ array
151 // if it exists, and otherwise read /proc/self/environ directly, using
152 // system calls to read the file, and thus avoid setting errno.
153 // /proc/self/environ has a limit of how much data it exports (around
154 // 8K), so it's not an ideal solution.
GetenvBeforeMain(const char * name)155 const char* GetenvBeforeMain(const char* name) {
156   const int namelen = slow_strlen(name);
157 #if defined(HAVE___ENVIRON)   // if we have it, it's declared in unistd.h
158   if (__environ) {            // can exist but be NULL, if statically linked
159     for (char** p = __environ; *p; p++) {
160       if (!slow_memcmp(*p, name, namelen) && (*p)[namelen] == '=')
161         return *p + namelen+1;
162     }
163     return NULL;
164   }
165 #endif
166 #if defined(PLATFORM_WINDOWS)
167   // TODO(mbelshe) - repeated calls to this function will overwrite the
168   // contents of the static buffer.
169   static char envvar_buf[1024];  // enough to hold any envvar we care about
170   if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1))
171     return NULL;
172   return envvar_buf;
173 #endif
174   // static is ok because this function should only be called before
175   // main(), when we're single-threaded.
176   static char envbuf[16<<10];
177   if (*envbuf == '\0') {    // haven't read the environ yet
178     int fd = safeopen("/proc/self/environ", O_RDONLY);
179     // The -2 below guarantees the last two bytes of the buffer will be \0\0
180     if (fd == -1 ||           // unable to open the file, fall back onto libc
181         saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file
182       RAW_VLOG(1, "Unable to open /proc/self/environ, falling back "
183                "on getenv(\"%s\"), which may not work", name);
184       if (fd != -1) safeclose(fd);
185       return getenv(name);
186     }
187     safeclose(fd);
188   }
189   const char* p = envbuf;
190   while (*p != '\0') {    // will happen at the \0\0 that terminates the buffer
191     // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
192     const char* endp = (char*)slow_memchr(p, '\0',
193                                           sizeof(envbuf) - (p - envbuf));
194     if (endp == NULL)            // this entry isn't NUL terminated
195       return NULL;
196     else if (!slow_memcmp(p, name, namelen) && p[namelen] == '=')    // it's a match
197       return p + namelen+1;      // point after =
198     p = endp + 1;
199   }
200   return NULL;                   // env var never found
201 }
202 
203 extern "C" {
TCMallocGetenvSafe(const char * name)204   const char* TCMallocGetenvSafe(const char* name) {
205     return GetenvBeforeMain(name);
206   }
207 }
208 
209 // This takes as an argument an environment-variable name (like
210 // CPUPROFILE) whose value is supposed to be a file-path, and sets
211 // path to that path, and returns true.  If the env var doesn't exist,
212 // or is the empty string, leave path unchanged and returns false.
213 // The reason this is non-trivial is that this function handles munged
214 // pathnames.  Here's why:
215 //
216 // If we're a child process of the 'main' process, we can't just use
217 // getenv("CPUPROFILE") -- the parent process will be using that path.
218 // Instead we append our pid to the pathname.  How do we tell if we're a
219 // child process?  Ideally we'd set an environment variable that all
220 // our children would inherit.  But -- and this is seemingly a bug in
221 // gcc -- if you do a setenv() in a shared libarary in a global
222 // constructor, the environment setting is lost by the time main() is
223 // called.  The only safe thing we can do in such a situation is to
224 // modify the existing envvar.  So we do a hack: in the parent, we set
225 // the high bit of the 1st char of CPUPROFILE.  In the child, we
226 // notice the high bit is set and append the pid().  This works
227 // assuming cpuprofile filenames don't normally have the high bit set
228 // in their first character!  If that assumption is violated, we'll
229 // still get a profile, but one with an unexpected name.
230 // TODO(csilvers): set an envvar instead when we can do it reliably.
GetUniquePathFromEnv(const char * env_name,char * path)231 bool GetUniquePathFromEnv(const char* env_name, char* path) {
232   char* envval = getenv(env_name);
233   if (envval == NULL || *envval == '\0')
234     return false;
235   if (envval[0] & 128) {                  // high bit is set
236     snprintf(path, PATH_MAX, "%c%s_%u",   // add pid and clear high bit
237              envval[0] & 127, envval+1, (unsigned int)(getpid()));
238   } else {
239     snprintf(path, PATH_MAX, "%s", envval);
240     envval[0] |= 128;                     // set high bit for kids to see
241   }
242   return true;
243 }
244 
SleepForMilliseconds(int milliseconds)245 void SleepForMilliseconds(int milliseconds) {
246 #ifdef PLATFORM_WINDOWS
247   _sleep(milliseconds);   // Windows's _sleep takes milliseconds argument
248 #else
249   // Sleep for a few milliseconds
250   struct timespec sleep_time;
251   sleep_time.tv_sec = milliseconds / 1000;
252   sleep_time.tv_nsec = (milliseconds % 1000) * 1000000;
253   while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
254     ;  // Ignore signals and wait for the full interval to elapse.
255 #endif
256 }
257 
GetSystemCPUsCount()258 int GetSystemCPUsCount()
259 {
260 #if defined(PLATFORM_WINDOWS)
261   // Get the number of processors.
262   SYSTEM_INFO info;
263   GetSystemInfo(&info);
264   return  info.dwNumberOfProcessors;
265 #else
266   long rv = sysconf(_SC_NPROCESSORS_ONLN);
267   if (rv < 0) {
268     return 1;
269   }
270   return static_cast<int>(rv);
271 #endif
272 }
273 
274 // ----------------------------------------------------------------------
275 
276 #if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__
ConstructFilename(const char * spec,pid_t pid,char * buf,int buf_size)277 static void ConstructFilename(const char* spec, pid_t pid,
278                               char* buf, int buf_size) {
279   CHECK_LT(snprintf(buf, buf_size,
280                     spec,
281                     static_cast<int>(pid ? pid : getpid())), buf_size);
282 }
283 #endif
284 
285 // A templatized helper function instantiated for Mach (OS X) only.
286 // It can handle finding info for both 32 bits and 64 bits.
287 // Returns true if it successfully handled the hdr, false else.
288 #ifdef __MACH__          // Mac OS X, almost certainly
289 template<uint32_t kMagic, uint32_t kLCSegment,
290          typename MachHeader, typename SegmentCommand>
NextExtMachHelper(const mach_header * hdr,int current_image,int current_load_cmd,uint64 * start,uint64 * end,char ** flags,uint64 * offset,int64 * inode,char ** filename,uint64 * file_mapping,uint64 * file_pages,uint64 * anon_mapping,uint64 * anon_pages,dev_t * dev)291 static bool NextExtMachHelper(const mach_header* hdr,
292                               int current_image, int current_load_cmd,
293                               uint64 *start, uint64 *end, char **flags,
294                               uint64 *offset, int64 *inode, char **filename,
295                               uint64 *file_mapping, uint64 *file_pages,
296                               uint64 *anon_mapping, uint64 *anon_pages,
297                               dev_t *dev) {
298   static char kDefaultPerms[5] = "r-xp";
299   if (hdr->magic != kMagic)
300     return false;
301   const char* lc = (const char *)hdr + sizeof(MachHeader);
302   // TODO(csilvers): make this not-quadradic (increment and hold state)
303   for (int j = 0; j < current_load_cmd; j++)  // advance to *our* load_cmd
304     lc += ((const load_command *)lc)->cmdsize;
305   if (((const load_command *)lc)->cmd == kLCSegment) {
306     const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image);
307     const SegmentCommand* sc = (const SegmentCommand *)lc;
308     if (start) *start = sc->vmaddr + dlloff;
309     if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
310     if (flags) *flags = kDefaultPerms;  // can we do better?
311     if (offset) *offset = sc->fileoff;
312     if (inode) *inode = 0;
313     if (filename)
314       *filename = const_cast<char*>(_dyld_get_image_name(current_image));
315     if (file_mapping) *file_mapping = 0;
316     if (file_pages) *file_pages = 0;   // could we use sc->filesize?
317     if (anon_mapping) *anon_mapping = 0;
318     if (anon_pages) *anon_pages = 0;
319     if (dev) *dev = 0;
320     return true;
321   }
322 
323   return false;
324 }
325 #endif
326 
327 // Finds |c| in |text|, and assign '\0' at the found position.
328 // The original character at the modified position should be |c|.
329 // A pointer to the modified position is stored in |endptr|.
330 // |endptr| should not be NULL.
ExtractUntilChar(char * text,int c,char ** endptr)331 static bool ExtractUntilChar(char *text, int c, char **endptr) {
332   CHECK_NE(text, NULL);
333   CHECK_NE(endptr, NULL);
334   char *found;
335   found = strchr(text, c);
336   if (found == NULL) {
337     *endptr = NULL;
338     return false;
339   }
340 
341   *endptr = found;
342   *found = '\0';
343   return true;
344 }
345 
346 // Increments |*text_pointer| while it points a whitespace character.
347 // It is to follow sscanf's whilespace handling.
SkipWhileWhitespace(char ** text_pointer,int c)348 static void SkipWhileWhitespace(char **text_pointer, int c) {
349   if (isspace(c)) {
350     while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) {
351       ++(*text_pointer);
352     }
353   }
354 }
355 
356 template<class T>
StringToInteger(char * text,char ** endptr,int base)357 static T StringToInteger(char *text, char **endptr, int base) {
358   assert(false);
359   return T();
360 }
361 
362 template<>
StringToInteger(char * text,char ** endptr,int base)363 int StringToInteger<int>(char *text, char **endptr, int base) {
364   return strtol(text, endptr, base);
365 }
366 
367 template<>
StringToInteger(char * text,char ** endptr,int base)368 int64 StringToInteger<int64>(char *text, char **endptr, int base) {
369   return strtoll(text, endptr, base);
370 }
371 
372 template<>
StringToInteger(char * text,char ** endptr,int base)373 uint64 StringToInteger<uint64>(char *text, char **endptr, int base) {
374   return strtoull(text, endptr, base);
375 }
376 
377 template<typename T>
StringToIntegerUntilChar(char * text,int base,int c,char ** endptr_result)378 static T StringToIntegerUntilChar(
379     char *text, int base, int c, char **endptr_result) {
380   CHECK_NE(endptr_result, NULL);
381   *endptr_result = NULL;
382 
383   char *endptr_extract;
384   if (!ExtractUntilChar(text, c, &endptr_extract))
385     return 0;
386 
387   T result;
388   char *endptr_strto;
389   result = StringToInteger<T>(text, &endptr_strto, base);
390   *endptr_extract = c;
391 
392   if (endptr_extract != endptr_strto)
393     return 0;
394 
395   *endptr_result = endptr_extract;
396   SkipWhileWhitespace(endptr_result, c);
397 
398   return result;
399 }
400 
CopyStringUntilChar(char * text,unsigned out_len,int c,char * out)401 static char *CopyStringUntilChar(
402     char *text, unsigned out_len, int c, char *out) {
403   char *endptr;
404   if (!ExtractUntilChar(text, c, &endptr))
405     return NULL;
406 
407   strncpy(out, text, out_len);
408   out[out_len-1] = '\0';
409   *endptr = c;
410 
411   SkipWhileWhitespace(&endptr, c);
412   return endptr;
413 }
414 
415 template<typename T>
StringToIntegerUntilCharWithCheck(T * outptr,char * text,int base,int c,char ** endptr)416 static bool StringToIntegerUntilCharWithCheck(
417     T *outptr, char *text, int base, int c, char **endptr) {
418   *outptr = StringToIntegerUntilChar<T>(*endptr, base, c, endptr);
419   if (*endptr == NULL || **endptr == '\0') return false;
420   ++(*endptr);
421   return true;
422 }
423 
ParseProcMapsLine(char * text,uint64 * start,uint64 * end,char * flags,uint64 * offset,int * major,int * minor,int64 * inode,unsigned * filename_offset)424 static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end,
425                               char *flags, uint64 *offset,
426                               int *major, int *minor, int64 *inode,
427                               unsigned *filename_offset) {
428 #if defined(__linux__)
429   /*
430    * It's similar to:
431    * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
432    *        start, end, flags, offset, major, minor, inode, filename_offset)
433    */
434   char *endptr = text;
435   if (endptr == NULL || *endptr == '\0')  return false;
436 
437   if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr))
438     return false;
439 
440   if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr))
441     return false;
442 
443   endptr = CopyStringUntilChar(endptr, 5, ' ', flags);
444   if (endptr == NULL || *endptr == '\0')  return false;
445   ++endptr;
446 
447   if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr))
448     return false;
449 
450   if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr))
451     return false;
452 
453   if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr))
454     return false;
455 
456   if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr))
457     return false;
458 
459   *filename_offset = (endptr - text);
460   return true;
461 #else
462   return false;
463 #endif
464 }
465 
ProcMapsIterator(pid_t pid)466 ProcMapsIterator::ProcMapsIterator(pid_t pid) {
467   Init(pid, NULL, false);
468 }
469 
ProcMapsIterator(pid_t pid,Buffer * buffer)470 ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) {
471   Init(pid, buffer, false);
472 }
473 
ProcMapsIterator(pid_t pid,Buffer * buffer,bool use_maps_backing)474 ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer,
475                                    bool use_maps_backing) {
476   Init(pid, buffer, use_maps_backing);
477 }
478 
Init(pid_t pid,Buffer * buffer,bool use_maps_backing)479 void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
480                             bool use_maps_backing) {
481   pid_ = pid;
482   using_maps_backing_ = use_maps_backing;
483   dynamic_buffer_ = NULL;
484   if (!buffer) {
485     // If the user didn't pass in any buffer storage, allocate it
486     // now. This is the normal case; the signal handler passes in a
487     // static buffer.
488     buffer = dynamic_buffer_ = new Buffer;
489   } else {
490     dynamic_buffer_ = NULL;
491   }
492 
493   ibuf_ = buffer->buf_;
494 
495   stext_ = etext_ = nextline_ = ibuf_;
496   ebuf_ = ibuf_ + Buffer::kBufSize - 1;
497   nextline_ = ibuf_;
498 
499 #if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
500   if (use_maps_backing) {  // don't bother with clever "self" stuff in this case
501     ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize);
502   } else if (pid == 0) {
503     // We have to kludge a bit to deal with the args ConstructFilename
504     // expects.  The 1 is never used -- it's only impt. that it's not 0.
505     ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize);
506   } else {
507     ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize);
508   }
509   // No error logging since this can be called from the crash dump
510   // handler at awkward moments. Users should call Valid() before
511   // using.
512   NO_INTR(fd_ = open(ibuf_, O_RDONLY));
513 #elif defined(__FreeBSD__)
514   // We don't support maps_backing on freebsd
515   if (pid == 0) {
516     ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize);
517   } else {
518     ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
519   }
520   NO_INTR(fd_ = open(ibuf_, O_RDONLY));
521 #elif defined(__sun__)
522   if (pid == 0) {
523     ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize);
524   } else {
525     ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
526   }
527   NO_INTR(fd_ = open(ibuf_, O_RDONLY));
528 #elif defined(__MACH__)
529   current_image_ = _dyld_image_count();   // count down from the top
530   current_load_cmd_ = -1;
531 #elif defined(PLATFORM_WINDOWS)
532   snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
533                                        TH32CS_SNAPMODULE32,
534                                        GetCurrentProcessId());
535   memset(&module_, 0, sizeof(module_));
536 #else
537   fd_ = -1;   // so Valid() is always false
538 #endif
539 
540 }
541 
~ProcMapsIterator()542 ProcMapsIterator::~ProcMapsIterator() {
543 #if defined(PLATFORM_WINDOWS)
544   if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
545 #elif defined(__MACH__)
546   // no cleanup necessary!
547 #else
548   if (fd_ >= 0) NO_INTR(close(fd_));
549 #endif
550   delete dynamic_buffer_;
551 }
552 
Valid() const553 bool ProcMapsIterator::Valid() const {
554 #if defined(PLATFORM_WINDOWS)
555   return snapshot_ != INVALID_HANDLE_VALUE;
556 #elif defined(__MACH__)
557   return 1;
558 #else
559   return fd_ != -1;
560 #endif
561 }
562 
Next(uint64 * start,uint64 * end,char ** flags,uint64 * offset,int64 * inode,char ** filename)563 bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags,
564                             uint64 *offset, int64 *inode, char **filename) {
565   return NextExt(start, end, flags, offset, inode, filename, NULL, NULL,
566                  NULL, NULL, NULL);
567 }
568 
569 // This has too many arguments.  It should really be building
570 // a map object and returning it.  The problem is that this is called
571 // when the memory allocator state is undefined, hence the arguments.
NextExt(uint64 * start,uint64 * end,char ** flags,uint64 * offset,int64 * inode,char ** filename,uint64 * file_mapping,uint64 * file_pages,uint64 * anon_mapping,uint64 * anon_pages,dev_t * dev)572 bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
573                                uint64 *offset, int64 *inode, char **filename,
574                                uint64 *file_mapping, uint64 *file_pages,
575                                uint64 *anon_mapping, uint64 *anon_pages,
576                                dev_t *dev) {
577 
578 #if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
579   do {
580     // Advance to the start of the next line
581     stext_ = nextline_;
582 
583     // See if we have a complete line in the buffer already
584     nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_));
585     if (!nextline_) {
586       // Shift/fill the buffer so we do have a line
587       int count = etext_ - stext_;
588 
589       // Move the current text to the start of the buffer
590       memmove(ibuf_, stext_, count);
591       stext_ = ibuf_;
592       etext_ = ibuf_ + count;
593 
594       int nread = 0;            // fill up buffer with text
595       while (etext_ < ebuf_) {
596         NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_));
597         if (nread > 0)
598           etext_ += nread;
599         else
600           break;
601       }
602 
603       // Zero out remaining characters in buffer at EOF to avoid returning
604       // garbage from subsequent calls.
605       if (etext_ != ebuf_ && nread == 0) {
606         memset(etext_, 0, ebuf_ - etext_);
607       }
608       *etext_ = '\n';   // sentinel; safe because ibuf extends 1 char beyond ebuf
609       nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_));
610     }
611     *nextline_ = 0;                // turn newline into nul
612     nextline_ += ((nextline_ < etext_)? 1 : 0);  // skip nul if not end of text
613     // stext_ now points at a nul-terminated line
614     uint64 tmpstart, tmpend, tmpoffset;
615     int64 tmpinode;
616     int major, minor;
617     unsigned filename_offset = 0;
618 #if defined(__linux__)
619     // for now, assume all linuxes have the same format
620     if (!ParseProcMapsLine(
621         stext_,
622         start ? start : &tmpstart,
623         end ? end : &tmpend,
624         flags_,
625         offset ? offset : &tmpoffset,
626         &major, &minor,
627         inode ? inode : &tmpinode, &filename_offset)) continue;
628 #elif defined(__CYGWIN__) || defined(__CYGWIN32__)
629     // cygwin is like linux, except the third field is the "entry point"
630     // rather than the offset (see format_process_maps at
631     // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
632     // Offset is always be 0 on cygwin: cygwin implements an mmap
633     // by loading the whole file and then calling NtMapViewOfSection.
634     // Cygwin also seems to set its flags kinda randomly; use windows default.
635     char tmpflags[5];
636     if (offset)
637       *offset = 0;
638     strcpy(flags_, "r-xp");
639     if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
640                start ? start : &tmpstart,
641                end ? end : &tmpend,
642                tmpflags,
643                &tmpoffset,
644                &major, &minor,
645                inode ? inode : &tmpinode, &filename_offset) != 7) continue;
646 #elif defined(__FreeBSD__)
647     // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
648     tmpstart = tmpend = tmpoffset = 0;
649     tmpinode = 0;
650     major = minor = 0;   // can't get this info in freebsd
651     if (inode)
652       *inode = 0;        // nor this
653     if (offset)
654       *offset = 0;       // seems like this should be in there, but maybe not
655     // start end resident privateresident obj(?) prot refcnt shadowcnt
656     // flags copy_on_write needs_copy type filename:
657     // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat
658     if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
659                start ? start : &tmpstart,
660                end ? end : &tmpend,
661                flags_,
662                &filename_offset) != 3) continue;
663 #endif
664 
665     // Depending on the Linux kernel being used, there may or may not be a space
666     // after the inode if there is no filename.  sscanf will in such situations
667     // nondeterministically either fill in filename_offset or not (the results
668     // differ on multiple calls in the same run even with identical arguments).
669     // We don't want to wander off somewhere beyond the end of the string.
670     size_t stext_length = strlen(stext_);
671     if (filename_offset == 0 || filename_offset > stext_length)
672       filename_offset = stext_length;
673 
674     // We found an entry
675     if (flags) *flags = flags_;
676     if (filename) *filename = stext_ + filename_offset;
677     if (dev) *dev = minor | (major << 8);
678 
679     if (using_maps_backing_) {
680       // Extract and parse physical page backing info.
681       char *backing_ptr = stext_ + filename_offset +
682           strlen(stext_+filename_offset);
683 
684       // find the second '('
685       int paren_count = 0;
686       while (--backing_ptr > stext_) {
687         if (*backing_ptr == '(') {
688           ++paren_count;
689           if (paren_count >= 2) {
690             uint64 tmp_file_mapping;
691             uint64 tmp_file_pages;
692             uint64 tmp_anon_mapping;
693             uint64 tmp_anon_pages;
694 
695             sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")",
696                    file_mapping ? file_mapping : &tmp_file_mapping,
697                    file_pages ? file_pages : &tmp_file_pages,
698                    anon_mapping ? anon_mapping : &tmp_anon_mapping,
699                    anon_pages ? anon_pages : &tmp_anon_pages);
700             // null terminate the file name (there is a space
701             // before the first (.
702             backing_ptr[-1] = 0;
703             break;
704           }
705         }
706       }
707     }
708 
709     return true;
710   } while (etext_ > ibuf_);
711 #elif defined(__sun__)
712   // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1
713   static char kPerms[8][4] = { "---", "--x", "-w-", "-wx",
714                                "r--", "r-x", "rw-", "rwx" };
715   COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4);
716   COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2);
717   COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1);
718   Buffer object_path;
719   int nread = 0;            // fill up buffer with text
720   NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t)));
721   if (nread == sizeof(prmap_t)) {
722     long inode_from_mapname = 0;
723     prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_);
724     // Best-effort attempt to get the inode from the filename.  I think the
725     // two middle ints are major and minor device numbers, but I'm not sure.
726     sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname);
727 
728     if (pid_ == 0) {
729       CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
730                         "/proc/self/path/%s", mapinfo->pr_mapname),
731                Buffer::kBufSize);
732     } else {
733       CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
734                         "/proc/%d/path/%s",
735                         static_cast<int>(pid_), mapinfo->pr_mapname),
736                Buffer::kBufSize);
737     }
738     ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX);
739     CHECK_LT(len, PATH_MAX);
740     if (len < 0)
741       len = 0;
742     current_filename_[len] = '\0';
743 
744     if (start) *start = mapinfo->pr_vaddr;
745     if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size;
746     if (flags) *flags = kPerms[mapinfo->pr_mflags & 7];
747     if (offset) *offset = mapinfo->pr_offset;
748     if (inode) *inode = inode_from_mapname;
749     if (filename) *filename = current_filename_;
750     if (file_mapping) *file_mapping = 0;
751     if (file_pages) *file_pages = 0;
752     if (anon_mapping) *anon_mapping = 0;
753     if (anon_pages) *anon_pages = 0;
754     if (dev) *dev = 0;
755     return true;
756   }
757 #elif defined(__MACH__)
758   // We return a separate entry for each segment in the DLL. (TODO(csilvers):
759   // can we do better?)  A DLL ("image") has load-commands, some of which
760   // talk about segment boundaries.
761   // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912
762   for (; current_image_ >= 0; current_image_--) {
763     const mach_header* hdr = _dyld_get_image_header(current_image_);
764     if (!hdr) continue;
765     if (current_load_cmd_ < 0)   // set up for this image
766       current_load_cmd_ = hdr->ncmds;  // again, go from the top down
767 
768     // We start with the next load command (we've already looked at this one).
769     for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) {
770 #ifdef MH_MAGIC_64
771       if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64,
772                             struct mach_header_64, struct segment_command_64>(
773                                 hdr, current_image_, current_load_cmd_,
774                                 start, end, flags, offset, inode, filename,
775                                 file_mapping, file_pages, anon_mapping,
776                                 anon_pages, dev)) {
777         return true;
778       }
779 #endif
780       if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT,
781                             struct mach_header, struct segment_command>(
782                                 hdr, current_image_, current_load_cmd_,
783                                 start, end, flags, offset, inode, filename,
784                                 file_mapping, file_pages, anon_mapping,
785                                 anon_pages, dev)) {
786         return true;
787       }
788     }
789     // If we get here, no more load_cmd's in this image talk about
790     // segments.  Go on to the next image.
791   }
792 #elif defined(PLATFORM_WINDOWS)
793   static char kDefaultPerms[5] = "r-xp";
794   BOOL ok;
795   if (module_.dwSize == 0) {  // only possible before first call
796     module_.dwSize = sizeof(module_);
797     ok = Module32First(snapshot_, &module_);
798   } else {
799     ok = Module32Next(snapshot_, &module_);
800   }
801   if (ok) {
802     uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr);
803     if (start) *start = base_addr;
804     if (end) *end = base_addr + module_.modBaseSize;
805     if (flags) *flags = kDefaultPerms;
806     if (offset) *offset = 0;
807     if (inode) *inode = 0;
808     if (filename) *filename = module_.szExePath;
809     if (file_mapping) *file_mapping = 0;
810     if (file_pages) *file_pages = 0;
811     if (anon_mapping) *anon_mapping = 0;
812     if (anon_pages) *anon_pages = 0;
813     if (dev) *dev = 0;
814     return true;
815   }
816 #endif
817 
818   // We didn't find anything
819   return false;
820 }
821 
FormatLine(char * buffer,int bufsize,uint64 start,uint64 end,const char * flags,uint64 offset,int64 inode,const char * filename,dev_t dev)822 int ProcMapsIterator::FormatLine(char* buffer, int bufsize,
823                                  uint64 start, uint64 end, const char *flags,
824                                  uint64 offset, int64 inode,
825                                  const char *filename, dev_t dev) {
826   // We assume 'flags' looks like 'rwxp' or 'rwx'.
827   char r = (flags && flags[0] == 'r') ? 'r' : '-';
828   char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-';
829   char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-';
830   // p always seems set on linux, so we set the default to 'p', not '-'
831   char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p')
832       ? '-' : 'p';
833 
834   const int rc = snprintf(buffer, bufsize,
835                           "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n",
836                           start, end, r,w,x,p, offset,
837                           static_cast<int>(dev/256), static_cast<int>(dev%256),
838                           inode, filename);
839   return (rc < 0 || rc >= bufsize) ? 0 : rc;
840 }
841 
842 namespace tcmalloc {
843 
844 // Helper to add the list of mapped shared libraries to a profile.
845 // Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size'
846 // and return the actual size occupied in 'buf'.  We fill wrote_all to true
847 // if we successfully wrote all proc lines to buf, false else.
848 // We do not provision for 0-terminating 'buf'.
FillProcSelfMaps(char buf[],int size,bool * wrote_all)849 int FillProcSelfMaps(char buf[], int size, bool* wrote_all) {
850   ProcMapsIterator::Buffer iterbuf;
851   ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
852 
853   uint64 start, end, offset;
854   int64 inode;
855   char *flags, *filename;
856   int bytes_written = 0;
857   *wrote_all = true;
858   while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
859     const int line_length = it.FormatLine(buf + bytes_written,
860                                           size - bytes_written,
861                                           start, end, flags, offset,
862                                           inode, filename, 0);
863     if (line_length == 0)
864       *wrote_all = false;     // failed to write this line out
865     else
866       bytes_written += line_length;
867 
868   }
869   return bytes_written;
870 }
871 
872 // Dump the same data as FillProcSelfMaps reads to fd.
873 // It seems easier to repeat parts of FillProcSelfMaps here than to
874 // reuse it via a call.
DumpProcSelfMaps(RawFD fd)875 void DumpProcSelfMaps(RawFD fd) {
876   ProcMapsIterator::Buffer iterbuf;
877   ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
878 
879   uint64 start, end, offset;
880   int64 inode;
881   char *flags, *filename;
882   ProcMapsIterator::Buffer linebuf;
883   while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
884     int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_),
885                                 start, end, flags, offset, inode, filename,
886                                 0);
887     RawWrite(fd, linebuf.buf_, written);
888   }
889 }
890 
891 }  // namespace tcmalloc
892