awsy/awsy/parse_about_memory.py

#!/usr/bin/env python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.


# Firefox about:memory log parser.

from __future__ import absolute_import, print_function

import argparse
from collections import defaultdict
import gzip
import json

# This value comes from nsIMemoryReporter.idl.
KIND_HEAP = 1


def path_total(data, path):
    """
    Calculates the sum for the given data point path and its children. If
    path does not end with a '/' then only the value for the exact path is
    returned.
    """
    path_totals = defaultdict(int)

    # Bookkeeping for calculating the heap-unclassified measurement.
    explicit_heap = defaultdict(int)
    heap_allocated = defaultdict(int)

    discrete = not path.endswith('/')

    def match(value):
        """
        Helper that performs either an explicit match or a prefix match
        depending on the format of the path passed in.
        """
        if discrete:
            return value == path
        else:
            return value.startswith(path)

    def update_bookkeeping(report):
        """
        Adds the value to the heap total if this an explicit entry that is a
        heap measurement and updates the heap allocated value if necessary.
        """
        if report["kind"] == KIND_HEAP and report["path"].startswith("explicit/"):
            explicit_heap[report["process"]] += report["amount"]
        elif report["path"] == "heap-allocated":
            heap_allocated[report["process"]] = report["amount"]

    def heap_unclassified(process):
        """
        Calculates the heap-unclassified value for the given process. This is
        simply the difference between all values reported as heap allocated
        under the explicit/ tree and the value reported for heap-allocated by
        the allocator.
        """
        # Memory reports should always include heap-allocated. If it's missing
        # just assert.
        assert process in heap_allocated

        unclassified = heap_allocated[process] - explicit_heap[process]

        # Make sure the value is sane. A misbehaving reporter could lead to
        # negative values.
        assert unclassified >= 0, "heap-unclassified was negative: %d" % unclassified

        return unclassified

    needs_bookkeeping = path in ("explicit/", "explicit/heap-unclassified")

    # Process all the reports.
    for report in data["reports"]:
        if needs_bookkeeping:
            update_bookkeeping(report)

        if match(report["path"]):
            path_totals[report["process"]] += report["amount"]

    # Handle special processing for explicit and heap-unclassified.
    if path == "explicit/":
        # If 'explicit/' is requested we need to add the 'explicit/heap-unclassified'
        # node that is generated by about:memory.
        for k, v in explicit_heap.items():
            path_totals[k] += heap_unclassified(k)
    elif path == "explicit/heap-unclassified":
        # If 'explicit/heap-unclassified' is requested we need to calculate the
        # value as it's generated by about:memory, not explicitly reported.
        for k, v in explicit_heap.items():
            path_totals[k] = heap_unclassified(k)

    return path_totals


def calculate_memory_report_values(memory_report_path, data_point_path,
                                   process_name=None):
    """
    Opens the given memory report file and calculates the value for the given
    data point.

    :param memory_report_path: Path to the memory report file to parse.
    :param data_point_path: Path of the data point to calculate in the memory
     report, ie: 'explicit/heap-unclassified'.
    :param process_name: Name of process to limit reports to. ie 'Main'
    """
    try:
        with open(memory_report_path) as f:
            data = json.load(f)
    except ValueError:
        # Check if the file is gzipped.
        with gzip.open(memory_report_path, 'rb') as f:
            data = json.load(f)

    totals = path_total(data, data_point_path)

    # If a process name is provided, restricted output to processes matching
    # that name.
    if process_name:
        for k in totals.keys():
            if process_name not in k:
                del totals[k]

    return totals


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
            description='Extract data points from about:memory reports')
    parser.add_argument('report', action='store',
                        help='Path to a memory report file.')
    parser.add_argument('prefix', action='store',
                        help='Prefix of data point to measure. '
                        'If the prefix does not end in a \'/\' '
                        'then an exact match is made.')
    parser.add_argument('--proc-filter', action='store', default=None,
                        help='Process name filter. '
                             'If not provided all processes will be included.')
    parser.add_argument('--mebi', action='store_true',
                        help='Output values as mebibytes (instead of bytes)'
                        ' to match about:memory.')

    args = parser.parse_args()
    totals = calculate_memory_report_values(
                    args.report, args.prefix, args.proc_filter)

    sorted_totals = sorted(totals.items(), key=lambda item: (-item[1], item[0]))
    for (k, v) in sorted_totals:
        if v:
            print("{0}\t".format(k)),
    print("")

    bytes_per_mebibyte = 1024.0 * 1024.0
    for (k, v) in sorted_totals:
        if v:
            if args.mebi:
                print("{0:.2f} MiB".format(v / bytes_per_mebibyte)),
            else:
                print("{0} bytes".format(v)),
            print("\t"),
    print("")