1#!/usr/bin/env python
2# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3#
4# Use of this source code is governed by a BSD-style license
5# that can be found in the LICENSE file in the root of the source
6# tree. An additional intellectual property rights grant can be found
7# in the file PATENTS.  All contributing project authors may
8# be found in the AUTHORS file in the root of the source tree.
9
10# memcheck_analyze.py
11
12''' Given a valgrind XML file, parses errors and uniques them.'''
13
14import gdb_helper
15
16from collections import defaultdict
17import hashlib
18import logging
19import optparse
20import os
21import re
22import subprocess
23import sys
24import time
25from xml.dom.minidom import parse
26from xml.parsers.expat import ExpatError
27
28import common
29
30# Global symbol table (yuck)
31TheAddressTable = None
32
33# These are regexps that define functions (using C++ mangled names)
34# we don't want to see in stack traces while pretty printing
35# or generating suppressions.
36# Just stop printing the stack/suppression frames when the current one
37# matches any of these.
38_BORING_CALLERS = common.BoringCallers(mangled=True, use_re_wildcards=True)
39
40def getTextOf(top_node, name):
41  ''' Returns all text in all DOM nodes with a certain |name| that are children
42  of |top_node|.
43  '''
44
45  text = ""
46  for nodes_named in top_node.getElementsByTagName(name):
47    text += "".join([node.data for node in nodes_named.childNodes
48                     if node.nodeType == node.TEXT_NODE])
49  return text
50
51def getCDATAOf(top_node, name):
52  ''' Returns all CDATA in all DOM nodes with a certain |name| that are children
53  of |top_node|.
54  '''
55
56  text = ""
57  for nodes_named in top_node.getElementsByTagName(name):
58    text += "".join([node.data for node in nodes_named.childNodes
59                     if node.nodeType == node.CDATA_SECTION_NODE])
60  if (text == ""):
61    return None
62  return text
63
64def shortenFilePath(source_dir, directory):
65  '''Returns a string with the string prefix |source_dir| removed from
66  |directory|.'''
67  prefixes_to_cut = ["build/src/", "valgrind/coregrind/", "out/Release/../../"]
68
69  if source_dir:
70    prefixes_to_cut.append(source_dir)
71
72  for p in prefixes_to_cut:
73    index = directory.rfind(p)
74    if index != -1:
75      directory = directory[index + len(p):]
76
77  return directory
78
79# Constants that give real names to the abbreviations in valgrind XML output.
80INSTRUCTION_POINTER = "ip"
81OBJECT_FILE = "obj"
82FUNCTION_NAME = "fn"
83SRC_FILE_DIR = "dir"
84SRC_FILE_NAME = "file"
85SRC_LINE = "line"
86
87def gatherFrames(node, source_dir):
88  frames = []
89  for frame in node.getElementsByTagName("frame"):
90    frame_dict = {
91      INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER),
92      OBJECT_FILE         : getTextOf(frame, OBJECT_FILE),
93      FUNCTION_NAME       : getTextOf(frame, FUNCTION_NAME),
94      SRC_FILE_DIR        : shortenFilePath(
95          source_dir, getTextOf(frame, SRC_FILE_DIR)),
96      SRC_FILE_NAME       : getTextOf(frame, SRC_FILE_NAME),
97      SRC_LINE            : getTextOf(frame, SRC_LINE)
98    }
99
100    # Ignore this frame and all the following if it's a "boring" function.
101    enough_frames = False
102    for regexp in _BORING_CALLERS:
103      if re.match("^%s$" % regexp, frame_dict[FUNCTION_NAME]):
104        enough_frames = True
105        break
106    if enough_frames:
107      break
108
109    frames += [frame_dict]
110
111    global TheAddressTable
112    if TheAddressTable != None and frame_dict[SRC_LINE] == "":
113      # Try using gdb
114      TheAddressTable.Add(frame_dict[OBJECT_FILE],
115                          frame_dict[INSTRUCTION_POINTER])
116  return frames
117
118class ValgrindError:
119  ''' Takes a <DOM Element: error> node and reads all the data from it. A
120  ValgrindError is immutable and is hashed on its pretty printed output.
121  '''
122
123  def __init__(self, source_dir, error_node, commandline, testcase):
124    ''' Copies all the relevant information out of the DOM and into object
125    properties.
126
127    Args:
128      error_node: The <error></error> DOM node we're extracting from.
129      source_dir: Prefix that should be stripped from the <dir> node.
130      commandline: The command that was run under valgrind
131      testcase: The test case name, if known.
132    '''
133
134    # Valgrind errors contain one <what><stack> pair, plus an optional
135    # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
136    # plus (since 3.5.0) a <suppression></suppression> pair.
137    # (Origin is nicely enclosed; too bad the other two aren't.)
138    # The most common way to see all three in one report is
139    # a syscall with a parameter that points to uninitialized memory, e.g.
140    # Format:
141    # <error>
142    #   <unique>0x6d</unique>
143    #   <tid>1</tid>
144    #   <kind>SyscallParam</kind>
145    #   <what>Syscall param write(buf) points to uninitialised byte(s)</what>
146    #   <stack>
147    #     <frame>
148    #     ...
149    #     </frame>
150    #   </stack>
151    #   <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
152    #   <stack>
153    #     <frame>
154    #     ...
155    #     </frame>
156    #   </stack>
157    #   <origin>
158    #   <what>Uninitialised value was created by a heap allocation</what>
159    #   <stack>
160    #     <frame>
161    #     ...
162    #     </frame>
163    #   </stack>
164    #   </origin>
165    #   <suppression>
166    #     <sname>insert_a_suppression_name_here</sname>
167    #     <skind>Memcheck:Param</skind>
168    #     <skaux>write(buf)</skaux>
169    #     <sframe> <fun>__write_nocancel</fun> </sframe>
170    #     ...
171    #     <sframe> <fun>main</fun> </sframe>
172    #     <rawtext>
173    # <![CDATA[
174    # {
175    #    <insert_a_suppression_name_here>
176    #    Memcheck:Param
177    #    write(buf)
178    #    fun:__write_nocancel
179    #    ...
180    #    fun:main
181    # }
182    # ]]>
183    #     </rawtext>
184    #   </suppression>
185    # </error>
186    #
187    # Each frame looks like this:
188    #  <frame>
189    #    <ip>0x83751BC</ip>
190    #    <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
191    #    <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
192    #    <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
193    #    <file>gtest-internal-inl.h</file>
194    #    <line>655</line>
195    #  </frame>
196    # although the dir, file, and line elements are missing if there is
197    # no debug info.
198
199    self._kind = getTextOf(error_node, "kind")
200    self._backtraces = []
201    self._suppression = None
202    self._commandline = commandline
203    self._testcase = testcase
204    self._additional = []
205
206    # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
207    description = None
208    for node in error_node.childNodes:
209      if node.localName == "what" or node.localName == "auxwhat":
210        description = "".join([n.data for n in node.childNodes
211                              if n.nodeType == n.TEXT_NODE])
212      elif node.localName == "xwhat":
213        description = getTextOf(node, "text")
214      elif node.localName == "stack":
215        assert description
216        self._backtraces.append([description, gatherFrames(node, source_dir)])
217        description = None
218      elif node.localName == "origin":
219        description = getTextOf(node, "what")
220        stack = node.getElementsByTagName("stack")[0]
221        frames = gatherFrames(stack, source_dir)
222        self._backtraces.append([description, frames])
223        description = None
224        stack = None
225        frames = None
226      elif description and node.localName != None:
227        # The lastest description has no stack, e.g. "Address 0x28 is unknown"
228        self._additional.append(description)
229        description = None
230
231      if node.localName == "suppression":
232        self._suppression = getCDATAOf(node, "rawtext");
233
234  def __str__(self):
235    ''' Pretty print the type and backtrace(s) of this specific error,
236        including suppression (which is just a mangled backtrace).'''
237    output = ""
238    output += "\n" # Make sure the ### is at the beginning of line.
239    output += "### BEGIN MEMORY TOOL REPORT (error hash=#%016X#)\n" % \
240        self.ErrorHash()
241    if (self._commandline):
242      output += self._commandline + "\n"
243
244    output += self._kind + "\n"
245    for backtrace in self._backtraces:
246      output += backtrace[0] + "\n"
247      filter = subprocess.Popen("c++filt -n", stdin=subprocess.PIPE,
248                                stdout=subprocess.PIPE,
249                                stderr=subprocess.STDOUT,
250                                shell=True,
251                                close_fds=True)
252      buf = ""
253      for frame in backtrace[1]:
254        buf +=  (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n"
255      (stdoutbuf, stderrbuf) = filter.communicate(buf.encode('latin-1'))
256      demangled_names = stdoutbuf.split("\n")
257
258      i = 0
259      for frame in backtrace[1]:
260        output += ("  " + demangled_names[i])
261        i = i + 1
262
263        global TheAddressTable
264        if TheAddressTable != None and frame[SRC_FILE_DIR] == "":
265           # Try using gdb
266           foo = TheAddressTable.GetFileLine(frame[OBJECT_FILE],
267                                             frame[INSTRUCTION_POINTER])
268           if foo[0] != None:
269             output += (" (" + foo[0] + ":" + foo[1] + ")")
270        elif frame[SRC_FILE_DIR] != "":
271          output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] +
272                     ":" + frame[SRC_LINE] + ")")
273        else:
274          output += " (" + frame[OBJECT_FILE] + ")"
275        output += "\n"
276
277    for additional in self._additional:
278      output += additional + "\n"
279
280    assert self._suppression != None, "Your Valgrind doesn't generate " \
281                                      "suppressions - is it too old?"
282
283    if self._testcase:
284      output += "The report came from the `%s` test.\n" % self._testcase
285    output += "Suppression (error hash=#%016X#):\n" % self.ErrorHash()
286    output += ("  For more info on using suppressions see "
287               "http://dev.chromium.org/developers/tree-sheriffs/sheriff-details-chromium/memory-sheriff#TOC-Suppressing-memory-reports")
288
289    # Widen suppression slightly to make portable between mac and linux
290    # TODO(timurrrr): Oops, these transformations should happen
291    # BEFORE calculating the hash!
292    supp = self._suppression;
293    supp = supp.replace("fun:_Znwj", "fun:_Znw*")
294    supp = supp.replace("fun:_Znwm", "fun:_Znw*")
295    supp = supp.replace("fun:_Znaj", "fun:_Zna*")
296    supp = supp.replace("fun:_Znam", "fun:_Zna*")
297
298    # Make suppressions even less platform-dependent.
299    for sz in [1, 2, 4, 8]:
300      supp = supp.replace("Memcheck:Addr%d" % sz, "Memcheck:Unaddressable")
301      supp = supp.replace("Memcheck:Value%d" % sz, "Memcheck:Uninitialized")
302    supp = supp.replace("Memcheck:Cond", "Memcheck:Uninitialized")
303
304    # Split into lines so we can enforce length limits
305    supplines = supp.split("\n")
306    supp = None  # to avoid re-use
307
308    # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
309    # or at the first 'boring' caller.
310    # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
311    # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
312    newlen = min(26, len(supplines));
313
314    # Drop boring frames and all the following.
315    enough_frames = False
316    for frameno in range(newlen):
317      for boring_caller in _BORING_CALLERS:
318        if re.match("^ +fun:%s$" % boring_caller, supplines[frameno]):
319          newlen = frameno
320          enough_frames = True
321          break
322      if enough_frames:
323        break
324    if (len(supplines) > newlen):
325      supplines = supplines[0:newlen]
326      supplines.append("}")
327
328    for frame in range(len(supplines)):
329      # Replace the always-changing anonymous namespace prefix with "*".
330      m = re.match("( +fun:)_ZN.*_GLOBAL__N_.*\.cc_" +
331                   "[0-9a-fA-F]{8}_[0-9a-fA-F]{8}(.*)",
332                   supplines[frame])
333      if m:
334        supplines[frame] = "*".join(m.groups())
335
336    output += "\n".join(supplines) + "\n"
337    output += "### END MEMORY TOOL REPORT (error hash=#%016X#)\n" % \
338        self.ErrorHash()
339
340    return output
341
342  def UniqueString(self):
343    ''' String to use for object identity. Don't print this, use str(obj)
344    instead.'''
345    rep = self._kind + " "
346    for backtrace in self._backtraces:
347      for frame in backtrace[1]:
348        rep += frame[FUNCTION_NAME]
349
350        if frame[SRC_FILE_DIR] != "":
351          rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
352        else:
353          rep += frame[OBJECT_FILE]
354
355    return rep
356
357  # This is a device-independent hash identifying the suppression.
358  # By printing out this hash we can find duplicate reports between tests and
359  # different shards running on multiple buildbots
360  def ErrorHash(self):
361    return int(hashlib.md5(self.UniqueString()).hexdigest()[:16], 16)
362
363  def __hash__(self):
364    return hash(self.UniqueString())
365  def __eq__(self, rhs):
366    return self.UniqueString() == rhs
367
368def log_is_finished(f, force_finish):
369  f.seek(0)
370  prev_line = ""
371  while True:
372    line = f.readline()
373    if line == "":
374      if not force_finish:
375        return False
376      # Okay, the log is not finished but we can make it up to be parseable:
377      if prev_line.strip() in ["</error>", "</errorcounts>", "</status>"]:
378        f.write("</valgrindoutput>\n")
379        return True
380      return False
381    if '</valgrindoutput>' in line:
382      # Valgrind often has garbage after </valgrindoutput> upon crash.
383      f.truncate()
384      return True
385    prev_line = line
386
387class MemcheckAnalyzer:
388  ''' Given a set of Valgrind XML files, parse all the errors out of them,
389  unique them and output the results.'''
390
391  SANITY_TEST_SUPPRESSIONS = {
392      "Memcheck sanity test 01 (memory leak).": 1,
393      "Memcheck sanity test 02 (malloc/read left).": 1,
394      "Memcheck sanity test 03 (malloc/read right).": 1,
395      "Memcheck sanity test 04 (malloc/write left).": 1,
396      "Memcheck sanity test 05 (malloc/write right).": 1,
397      "Memcheck sanity test 06 (new/read left).": 1,
398      "Memcheck sanity test 07 (new/read right).": 1,
399      "Memcheck sanity test 08 (new/write left).": 1,
400      "Memcheck sanity test 09 (new/write right).": 1,
401      "Memcheck sanity test 10 (write after free).": 1,
402      "Memcheck sanity test 11 (write after delete).": 1,
403      "Memcheck sanity test 12 (array deleted without []).": 1,
404      "Memcheck sanity test 13 (single element deleted with []).": 1,
405      "Memcheck sanity test 14 (malloc/read uninit).": 1,
406      "Memcheck sanity test 15 (new/read uninit).": 1,
407  }
408
409  # Max time to wait for memcheck logs to complete.
410  LOG_COMPLETION_TIMEOUT = 180.0
411
412  def __init__(self, source_dir, show_all_leaks=False, use_gdb=False):
413    '''Create a parser for Memcheck logs.
414
415    Args:
416      source_dir: Path to top of source tree for this build
417      show_all_leaks: Whether to show even less important leaks
418      use_gdb: Whether to use gdb to resolve source filenames and line numbers
419               in the report stacktraces
420    '''
421    self._source_dir = source_dir
422    self._show_all_leaks = show_all_leaks
423    self._use_gdb = use_gdb
424
425    # Contains the set of unique errors
426    self._errors = set()
427
428    # Contains the time when the we started analyzing the first log file.
429    # This variable is used to skip incomplete logs after some timeout.
430    self._analyze_start_time = None
431
432
433  def Report(self, files, testcase, check_sanity=False):
434    '''Reads in a set of files and prints Memcheck report.
435
436    Args:
437      files: A list of filenames.
438      check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
439    '''
440    # Beyond the detailed errors parsed by ValgrindError above,
441    # the xml file contain records describing suppressions that were used:
442    # <suppcounts>
443    #  <pair>
444    #    <count>28</count>
445    #    <name>pango_font_leak_todo</name>
446    #  </pair>
447    #  <pair>
448    #    <count>378</count>
449    #    <name>bug_13243</name>
450    #  </pair>
451    # </suppcounts
452    # Collect these and print them at the end.
453    #
454    # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
455    # the file also includes records of the form
456    # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
457    # giving the filename and load address of each binary that was mapped
458    # into the process.
459
460    global TheAddressTable
461    if self._use_gdb:
462      TheAddressTable = gdb_helper.AddressTable()
463    else:
464      TheAddressTable = None
465    cur_report_errors = set()
466    suppcounts = defaultdict(int)
467    badfiles = set()
468
469    if self._analyze_start_time == None:
470      self._analyze_start_time = time.time()
471    start_time = self._analyze_start_time
472
473    parse_failed = False
474    for file in files:
475      # Wait up to three minutes for valgrind to finish writing all files,
476      # but after that, just skip incomplete files and warn.
477      f = open(file, "r+")
478      pid = re.match(".*\.([0-9]+)$", file)
479      if pid:
480        pid = pid.groups()[0]
481      found = False
482      running = True
483      firstrun = True
484      skip = False
485      origsize = os.path.getsize(file)
486      while (running and not found and not skip and
487             (firstrun or
488              ((time.time() - start_time) < self.LOG_COMPLETION_TIMEOUT))):
489        firstrun = False
490        f.seek(0)
491        if pid:
492          # Make sure the process is still running so we don't wait for
493          # 3 minutes if it was killed. See http://crbug.com/17453
494          ps_out = subprocess.Popen("ps p %s" % pid, shell=True,
495                                    stdout=subprocess.PIPE).stdout
496          if len(ps_out.readlines()) < 2:
497            running = False
498        else:
499          skip = True
500          running = False
501        found = log_is_finished(f, False)
502        if not running and not found:
503          logging.warn("Valgrind process PID = %s is not running but its "
504                       "XML log has not been finished correctly.\n"
505                       "Make it up by adding some closing tags manually." % pid)
506          found = log_is_finished(f, not running)
507        if running and not found:
508          time.sleep(1)
509      f.close()
510      if not found:
511        badfiles.add(file)
512      else:
513        newsize = os.path.getsize(file)
514        if origsize > newsize+1:
515          logging.warn(str(origsize - newsize) +
516                       " bytes of junk were after </valgrindoutput> in %s!" %
517                       file)
518        try:
519          parsed_file = parse(file);
520        except ExpatError, e:
521          parse_failed = True
522          logging.warn("could not parse %s: %s" % (file, e))
523          lineno = e.lineno - 1
524          context_lines = 5
525          context_start = max(0, lineno - context_lines)
526          context_end = lineno + context_lines + 1
527          context_file = open(file, "r")
528          for i in range(0, context_start):
529            context_file.readline()
530          for i in range(context_start, context_end):
531            context_data = context_file.readline().rstrip()
532            if i != lineno:
533              logging.warn("  %s" % context_data)
534            else:
535              logging.warn("> %s" % context_data)
536          context_file.close()
537          continue
538        if TheAddressTable != None:
539          load_objs = parsed_file.getElementsByTagName("load_obj")
540          for load_obj in load_objs:
541            obj = getTextOf(load_obj, "obj")
542            ip = getTextOf(load_obj, "ip")
543            TheAddressTable.AddBinaryAt(obj, ip)
544
545        commandline = None
546        preamble = parsed_file.getElementsByTagName("preamble")[0];
547        for node in preamble.getElementsByTagName("line"):
548          if node.localName == "line":
549            for x in node.childNodes:
550              if x.nodeType == node.TEXT_NODE and "Command" in x.data:
551                commandline = x.data
552                break
553
554        raw_errors = parsed_file.getElementsByTagName("error")
555        for raw_error in raw_errors:
556          # Ignore "possible" leaks for now by default.
557          if (self._show_all_leaks or
558              getTextOf(raw_error, "kind") != "Leak_PossiblyLost"):
559            error = ValgrindError(self._source_dir,
560                                  raw_error, commandline, testcase)
561            if error not in cur_report_errors:
562              # We haven't seen such errors doing this report yet...
563              if error in self._errors:
564                # ... but we saw it in earlier reports, e.g. previous UI test
565                cur_report_errors.add("This error was already printed in "
566                                      "some other test, see 'hash=#%016X#'" % \
567                                      error.ErrorHash())
568              else:
569                # ... and we haven't seen it in other tests as well
570                self._errors.add(error)
571                cur_report_errors.add(error)
572
573        suppcountlist = parsed_file.getElementsByTagName("suppcounts")
574        if len(suppcountlist) > 0:
575          suppcountlist = suppcountlist[0]
576          for node in suppcountlist.getElementsByTagName("pair"):
577            count = getTextOf(node, "count");
578            name = getTextOf(node, "name");
579            suppcounts[name] += int(count)
580
581    if len(badfiles) > 0:
582      logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles))
583      for file in badfiles:
584        logging.warn("Last 20 lines of %s :" % file)
585        os.system("tail -n 20 '%s' 1>&2" % file)
586
587    if parse_failed:
588      logging.error("FAIL! Couldn't parse Valgrind output file")
589      return -2
590
591    common.PrintUsedSuppressionsList(suppcounts)
592
593    retcode = 0
594    if cur_report_errors:
595      logging.error("FAIL! There were %s errors: " % len(cur_report_errors))
596
597      if TheAddressTable != None:
598        TheAddressTable.ResolveAll()
599
600      for error in cur_report_errors:
601        logging.error(error)
602
603      retcode = -1
604
605    # Report tool's insanity even if there were errors.
606    if check_sanity:
607      remaining_sanity_supp = MemcheckAnalyzer.SANITY_TEST_SUPPRESSIONS
608      for (name, count) in suppcounts.iteritems():
609        # Workaround for http://crbug.com/334074
610        if (name in remaining_sanity_supp and
611            remaining_sanity_supp[name] <= count):
612          del remaining_sanity_supp[name]
613      if remaining_sanity_supp:
614        logging.error("FAIL! Sanity check failed!")
615        logging.info("The following test errors were not handled: ")
616        for (name, count) in remaining_sanity_supp.iteritems():
617          logging.info("  * %dx %s" % (count, name))
618        retcode = -3
619
620    if retcode != 0:
621      return retcode
622
623    logging.info("PASS! No errors found!")
624    return 0
625
626
627def _main():
628  '''For testing only. The MemcheckAnalyzer class should be imported instead.'''
629  parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
630  parser.add_option("", "--source-dir",
631                    help="path to top of source tree for this build"
632                    "(used to normalize source paths in baseline)")
633
634  (options, args) = parser.parse_args()
635  if len(args) == 0:
636    parser.error("no filename specified")
637  filenames = args
638
639  analyzer = MemcheckAnalyzer(options.source_dir, use_gdb=True)
640  return analyzer.Report(filenames, None)
641
642
643if __name__ == "__main__":
644  sys.exit(_main())
645