1#!/usr/bin/env python
2# Copyright 2018 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Parses a markdown file, extracts documentation for UMA metrics from the doc,
7and writes that into histograms.xml file.
8
9The syntax for the markdown this script processes is as follows:
10  . The first line for each UMA metric should be: '## [metric name]'.
11  . The following lines should include the additional information about the
12    metric, in a markdown list, in '[name]: [value]' format. For example:
13
14    * units: pixels
15    * owners: first@chromium.org, second@example.com
16
17  . The description, and explanation, of the metric should be after an empty
18    line after the list of attributes.
19  . Each UMA metric section should end with a line '---'. If there are non-UMA
20    sections at the beginning of the doc, then the first UMA section should be
21    preceeded by a '---' line.
22
23A complete example:
24
25=== sample.md
26# A sample markdown document.
27This is a sample markdown. It has some documentation for UMA metrics too.
28
29# Motivation
30The purpose of this sample doc is to be a guide for writing such docs.
31
32---
33## ExampleMetric.First
34* units: smiles
35* owners: firstowner@chromium.org, second@example.org
36* os: windows, mac
37* added: 2018-03-01
38* expires: 2023-01-01
39
40ExampleMetric.First measures the first example.
41---
42## ExampleMetric.Second
43* units: happiness
44
45This measures the second example.
46
47"""
48
49import datetime
50import os
51import re
52import sys
53import time
54import xml.dom.minidom
55
56sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
57import path_util
58
59sys.path.append(os.path.join(os.path.dirname(__file__), 'histograms'))
60import pretty_print
61
62SupportedTags = [
63    "added",
64    "expires",
65    "enum",
66    "os",
67    "owners",
68    "tags",
69    "units",
70]
71
72def IsTagKnown(tag):
73  return tag in SupportedTags
74
75
76def IsTagValid(tag, value):
77  assert IsTagKnown(tag)
78  if tag == 'added' or tag == 'expires':
79    if re.match('^M[0-9]{2,3}$', value):
80      return True
81    date = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', value)
82    return date and datetime.date(int(date.group(1)), int(date.group(2)),
83                                  int(date.group(3)))
84  return True
85
86
87class Trace:
88  def __init__(self, msg):
89    self.msg_ = msg
90    self.start_ = None
91
92  def __enter__(self):
93    self.start_ = time.time()
94    sys.stdout.write('%s ...' % (self.msg_))
95    sys.stdout.flush()
96
97  def __exit__(self, exc_type, exc_val, exc_tb):
98    sys.stdout.write(' Done (%.3f sec)\n' % (time.time() - self.start_))
99
100
101def GetMetricsFromMdFile(mdfile):
102  """Returns an array of metrics parsed from the markdown file. See the top of
103  the file for documentation on the format of the markdown file.
104  """
105  with open(mdfile) as f:
106    raw_md = f.read()
107  metrics = []
108  sections = re.split('\n---+\n', raw_md)
109  tag_pattern = re.compile('^\* ([^:]*): (.*)$')
110  for section in sections:
111    if len(section.strip()) == 0: break
112    lines = section.strip().split('\n')
113    # The first line should have the header, containing the name of the metric.
114    header_match = re.match('^##+ ', lines[0])
115    if not header_match: continue
116    metric = {}
117    metric['name'] = lines[0][len(header_match.group(0)):]
118    for i in range(1, len(lines)):
119      if len(lines[i]) == 0:
120        i += 1
121        break
122      match = tag_pattern.match(lines[i])
123      assert match
124      assert IsTagKnown(match.group(1)), 'Unknown tag: "%s".' % (match.group(1))
125      assert IsTagValid(match.group(1), match.group(2)), 'Invalid value "%s" ' \
126          'for tag "%s".' % (match.group(2), match.group(1))
127      metric[match.group(1)] = match.group(2)
128    assert i < len(lines), 'No summary found for "%s"' % metric['name']
129    metric['summary'] = '\n'.join(lines[i:])
130    assert 'owners' in metric, 'Must have owners for "%s"' % metric['name']
131    assert 'enum' in metric or 'units' in metric, 'Metric "%s" must have ' \
132        'a unit listed in "enum" or "units".' % metric['name']
133    metrics.append(metric)
134  return metrics
135
136
137def CreateNode(tree, tag, text):
138  node = tree.createElement(tag)
139  node.appendChild(tree.createTextNode(text))
140  return node
141
142
143def main():
144  """
145  argv[1]: The path to the md file.
146  argv[2]: The relative path of the xml file to be added.
147  """
148  if len(sys.argv) != 3:
149    sys.stderr.write('Usage: %s <path-to-md-file> <path-to-histograms-file>\n' %
150                     (sys.argv[0]))
151    sys.exit(1)
152
153  rel_path = sys.argv[2]
154  with Trace('Reading histograms.xml') as t:
155    xml_path = path_util.GetInputFile(
156        os.path.join('tools', 'metrics', 'histograms', rel_path))
157    with open(xml_path, 'rb') as f:
158      raw_xml = f.read()
159
160  with Trace('Parsing xml') as t:
161    tree = xml.dom.minidom.parseString(raw_xml)
162    histograms = tree.getElementsByTagName('histograms')
163    if histograms.length != 1:
164      sys.stderr.write('histograms.xml should have exactly one "histograms" '
165                       'section.\n');
166      sys.exit(1)
167    histograms = histograms[0]
168
169  with Trace('Parsing md file %s' % (sys.argv[1])) as t:
170    metrics = GetMetricsFromMdFile(sys.argv[1])
171
172  with Trace('Adding parsed metrics') as t:
173    for metric in metrics:
174      node = tree.createElement('histogram')
175      node.setAttribute('name', metric['name'])
176      if 'units' in metric:
177        node.setAttribute('units', metric['units'])
178      elif 'enum' in metric:
179        node.setAttribute('enum', metric['enum'])
180      owners = metric['owners'].split(',')
181      for owner in owners:
182        node.appendChild(CreateNode(tree, 'owner', owner))
183      node.appendChild(CreateNode(tree, 'summary', metric['summary']))
184      # TODO(sad): This always appends the metric to the list. This should
185      # also update if there is an already existing metric, instead of adding a
186      # new one.
187      histograms.appendChild(node)
188
189  with Trace('Pretty printing into histograms.xml') as t:
190    new_xml = pretty_print.PrettyPrintHistogramsTree(tree)
191    with open(xml_path, 'wb') as f:
192      f.write(new_xml)
193
194
195if __name__ == '__main__':
196  main()
197