1 // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2 // Copyright (c) 2009, Google Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // ---
32 // Author: Craig Silverstein
33 //
34 // This forks out to pprof to do the actual symbolizing.  We might
35 // be better off writing our own in C++.
36 
37 #include "config.h"
38 #include "symbolize.h"
39 #include <stdlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>   // for write()
42 #endif
43 #ifdef HAVE_SYS_SOCKET_H
44 #include <sys/socket.h>   // for socketpair() -- needed by Symbolize
45 #endif
46 #ifdef HAVE_SYS_WAIT_H
47 #include <sys/wait.h>   // for wait() -- needed by Symbolize
48 #endif
49 #ifdef HAVE_POLL_H
50 #include <poll.h>
51 #endif
52 #ifdef __MACH__
53 #include <mach-o/dyld.h>   // for GetProgramInvocationName()
54 #include <limits.h>        // for PATH_MAX
55 #endif
56 #if defined(__CYGWIN__) || defined(__CYGWIN32__)
57 #include <io.h>            // for get_osfhandle()
58 #endif
59 #include <string>
60 #include "base/commandlineflags.h"
61 #include "base/logging.h"
62 #include "base/sysinfo.h"
63 #if defined(__FreeBSD__)
64 #include <sys/sysctl.h>
65 #endif
66 
67 using std::string;
68 using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
69 
70 
71 DEFINE_string(symbolize_pprof,
72               EnvToString("PPROF_PATH", "pprof"),
73               "Path to pprof to call for reporting function names.");
74 
75 // heap_profile_table_pprof may be referenced after destructors are
76 // called (since that's when leak-checking is done), so we make
77 // a more-permanent copy that won't ever get destroyed.
78 static string* g_pprof_path = new string(FLAGS_symbolize_pprof);
79 
80 // Returns NULL if we're on an OS where we can't get the invocation name.
81 // Using a static var is ok because we're not called from a thread.
GetProgramInvocationName()82 static const char* GetProgramInvocationName() {
83 #if defined(HAVE_PROGRAM_INVOCATION_NAME)
84 #ifdef __UCLIBC__
85   extern const char* program_invocation_name; // uclibc provides this
86 #else
87   extern char* program_invocation_name;  // gcc provides this
88 #endif
89   return program_invocation_name;
90 #elif defined(__MACH__)
91   // We don't want to allocate memory for this since we may be
92   // calculating it when memory is corrupted.
93   static char program_invocation_name[PATH_MAX];
94   if (program_invocation_name[0] == '\0') {  // first time calculating
95     uint32_t length = sizeof(program_invocation_name);
96     if (_NSGetExecutablePath(program_invocation_name, &length))
97       return NULL;
98   }
99   return program_invocation_name;
100 #elif defined(__FreeBSD__)
101   static char program_invocation_name[PATH_MAX];
102   size_t len = sizeof(program_invocation_name);
103   static const int name[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
104   if (!sysctl(name, 4, program_invocation_name, &len, NULL, 0))
105     return program_invocation_name;
106   return NULL;
107 #else
108   return NULL;   // figure out a way to get argv[0]
109 #endif
110 }
111 
112 // Prints an error message when you can't run Symbolize().
PrintError(const char * reason)113 static void PrintError(const char* reason) {
114   RAW_LOG(ERROR,
115           "*** WARNING: Cannot convert addresses to symbols in output below.\n"
116           "*** Reason: %s\n"
117           "*** If you cannot fix this, try running pprof directly.\n",
118           reason);
119 }
120 
Add(const void * addr)121 void SymbolTable::Add(const void* addr) {
122   symbolization_table_[addr] = "";
123 }
124 
GetSymbol(const void * addr)125 const char* SymbolTable::GetSymbol(const void* addr) {
126   return symbolization_table_[addr];
127 }
128 
129 // Updates symbolization_table with the pointers to symbol names corresponding
130 // to its keys. The symbol names are stored in out, which is allocated and
131 // freed by the caller of this routine.
132 // Note that the forking/etc is not thread-safe or re-entrant.  That's
133 // ok for the purpose we need -- reporting leaks detected by heap-checker
134 // -- but be careful if you decide to use this routine for other purposes.
135 // Returns number of symbols read on error.  If can't symbolize, returns 0
136 // and emits an error message about why.
Symbolize()137 int SymbolTable::Symbolize() {
138 #if !defined(HAVE_UNISTD_H)  || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
139   PrintError("Perftools does not know how to call a sub-process on this O/S");
140   return 0;
141 #else
142   const char* argv0 = GetProgramInvocationName();
143   if (argv0 == NULL) {  // can't call symbolize if we can't figure out our name
144     PrintError("Cannot figure out the name of this executable (argv0)");
145     return 0;
146   }
147   if (access(g_pprof_path->c_str(), R_OK) != 0) {
148     PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
149     return 0;
150   }
151 
152   // All this work is to do two-way communication.  ugh.
153   int *child_in = NULL;   // file descriptors
154   int *child_out = NULL;  // for now, we don't worry about child_err
155   int child_fds[5][2];    // socketpair may be called up to five times below
156 
157   // The client program may close its stdin and/or stdout and/or stderr
158   // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
159   // In this case the communication between the forked processes may be broken
160   // if either the parent or the child tries to close or duplicate these
161   // descriptors. The loop below produces two pairs of file descriptors, each
162   // greater than 2 (stderr).
163   for (int i = 0; i < 5; i++) {
164     if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
165       for (int j = 0; j < i; j++) {
166         close(child_fds[j][0]);
167         close(child_fds[j][1]);
168         PrintError("Cannot create a socket pair");
169       }
170       return 0;
171     } else {
172       if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
173         if (child_in == NULL) {
174           child_in = child_fds[i];
175         } else {
176           child_out = child_fds[i];
177           for (int j = 0; j < i; j++) {
178             if (child_fds[j] == child_in) continue;
179             close(child_fds[j][0]);
180             close(child_fds[j][1]);
181           }
182           break;
183         }
184       }
185     }
186   }
187 
188   switch (fork()) {
189     case -1: {  // error
190       close(child_in[0]);
191       close(child_in[1]);
192       close(child_out[0]);
193       close(child_out[1]);
194       PrintError("Unknown error calling fork()");
195       return 0;
196     }
197     case 0: {  // child
198       close(child_in[1]);   // child uses the 0's, parent uses the 1's
199       close(child_out[1]);  // child uses the 0's, parent uses the 1's
200       close(0);
201       close(1);
202       if (dup2(child_in[0], 0) == -1) _exit(1);
203       if (dup2(child_out[0], 1) == -1) _exit(2);
204       // Unset vars that might cause trouble when we fork
205       unsetenv("CPUPROFILE");
206       unsetenv("HEAPPROFILE");
207       unsetenv("HEAPCHECK");
208       unsetenv("PERFTOOLS_VERBOSE");
209       execlp(g_pprof_path->c_str(), g_pprof_path->c_str(),
210              "--symbols", argv0, NULL);
211       _exit(3);  // if execvp fails, it's bad news for us
212     }
213     default: {  // parent
214       close(child_in[0]);   // child uses the 0's, parent uses the 1's
215       close(child_out[0]);  // child uses the 0's, parent uses the 1's
216 #ifdef HAVE_POLL_H
217       // Waiting for 1ms seems to give the OS time to notice any errors.
218       poll(0, 0, 1);
219       // For maximum safety, we check to make sure the execlp
220       // succeeded before trying to write.  (Otherwise we'll get a
221       // SIGPIPE.)  For systems without poll.h, we'll just skip this
222       // check, and trust that the user set PPROF_PATH correctly!
223       struct pollfd pfd = { child_in[1], POLLOUT, 0 };
224       if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
225           (pfd.revents & (POLLHUP|POLLERR))) {
226         PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
227         return 0;
228       }
229 #endif
230 #if defined(__CYGWIN__) || defined(__CYGWIN32__)
231       // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd.  Convert.
232       const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
233       DumpProcSelfMaps(symbols_handle);
234 #else
235       DumpProcSelfMaps(child_in[1]);  // what pprof expects on stdin
236 #endif
237 
238       // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
239       // address to feed to pprof.
240       const int kOutBufSize = 24 * symbolization_table_.size();
241       char *pprof_buffer = new char[kOutBufSize];
242       int written = 0;
243       for (SymbolMap::const_iterator iter = symbolization_table_.begin();
244            iter != symbolization_table_.end(); ++iter) {
245         written += snprintf(pprof_buffer + written, kOutBufSize - written,
246                  // pprof expects format to be 0xXXXXXX
247                  "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
248       }
249       write(child_in[1], pprof_buffer, strlen(pprof_buffer));
250       close(child_in[1]);             // that's all we need to write
251       delete[] pprof_buffer;
252 
253       const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
254       int total_bytes_read = 0;
255       delete[] symbol_buffer_;
256       symbol_buffer_ = new char[kSymbolBufferSize];
257       memset(symbol_buffer_, '\0', kSymbolBufferSize);
258       while (1) {
259         int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
260                               kSymbolBufferSize - total_bytes_read);
261         if (bytes_read < 0) {
262           close(child_out[1]);
263           PrintError("Cannot read data from pprof");
264           return 0;
265         } else if (bytes_read == 0) {
266           close(child_out[1]);
267           wait(NULL);
268           break;
269         } else {
270           total_bytes_read += bytes_read;
271         }
272       }
273       // We have successfully read the output of pprof into out.  Make sure
274       // the last symbol is full (we can tell because it ends with a \n).
275       if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
276         return 0;
277       // make the symbolization_table_ values point to the output vector
278       SymbolMap::iterator fill = symbolization_table_.begin();
279       int num_symbols = 0;
280       const char *current_name = symbol_buffer_;
281       for (int i = 0; i < total_bytes_read; i++) {
282         if (symbol_buffer_[i] == '\n') {
283           fill->second = current_name;
284           symbol_buffer_[i] = '\0';
285           current_name = symbol_buffer_ + i + 1;
286           fill++;
287           num_symbols++;
288         }
289       }
290       return num_symbols;
291     }
292   }
293   PrintError("Unkown error (should never occur!)");
294   return 0;  // shouldn't be reachable
295 #endif
296 }
297