1#!/usr/bin/env python 2# Copyright 2018 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Parses a markdown file, extracts documentation for UMA metrics from the doc, 7and writes that into histograms.xml file. 8 9The syntax for the markdown this script processes is as follows: 10 . The first line for each UMA metric should be: '## [metric name]'. 11 . The following lines should include the additional information about the 12 metric, in a markdown list, in '[name]: [value]' format. For example: 13 14 * units: pixels 15 * owners: first@chromium.org, second@example.com 16 17 . The description, and explanation, of the metric should be after an empty 18 line after the list of attributes. 19 . Each UMA metric section should end with a line '---'. If there are non-UMA 20 sections at the beginning of the doc, then the first UMA section should be 21 preceeded by a '---' line. 22 23A complete example: 24 25=== sample.md 26# A sample markdown document. 27This is a sample markdown. It has some documentation for UMA metrics too. 28 29# Motivation 30The purpose of this sample doc is to be a guide for writing such docs. 31 32--- 33## ExampleMetric.First 34* units: smiles 35* owners: firstowner@chromium.org, second@example.org 36* os: windows, mac 37* added: 2018-03-01 38* expires: 2023-01-01 39 40ExampleMetric.First measures the first example. 41--- 42## ExampleMetric.Second 43* units: happiness 44 45This measures the second example. 46 47""" 48 49import datetime 50import os 51import re 52import sys 53import time 54import xml.dom.minidom 55 56sys.path.append(os.path.join(os.path.dirname(__file__), 'common')) 57import path_util 58 59sys.path.append(os.path.join(os.path.dirname(__file__), 'histograms')) 60import pretty_print 61 62SupportedTags = [ 63 "added", 64 "expires", 65 "enum", 66 "os", 67 "owners", 68 "tags", 69 "units", 70] 71 72def IsTagKnown(tag): 73 return tag in SupportedTags 74 75 76def IsTagValid(tag, value): 77 assert IsTagKnown(tag) 78 if tag == 'added' or tag == 'expires': 79 if re.match('^M[0-9]{2,3}$', value): 80 return True 81 date = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', value) 82 return date and datetime.date(int(date.group(1)), int(date.group(2)), 83 int(date.group(3))) 84 return True 85 86 87class Trace: 88 def __init__(self, msg): 89 self.msg_ = msg 90 self.start_ = None 91 92 def __enter__(self): 93 self.start_ = time.time() 94 sys.stdout.write('%s ...' % (self.msg_)) 95 sys.stdout.flush() 96 97 def __exit__(self, exc_type, exc_val, exc_tb): 98 sys.stdout.write(' Done (%.3f sec)\n' % (time.time() - self.start_)) 99 100 101def GetMetricsFromMdFile(mdfile): 102 """Returns an array of metrics parsed from the markdown file. See the top of 103 the file for documentation on the format of the markdown file. 104 """ 105 with open(mdfile) as f: 106 raw_md = f.read() 107 metrics = [] 108 sections = re.split('\n---+\n', raw_md) 109 tag_pattern = re.compile('^\* ([^:]*): (.*)$') 110 for section in sections: 111 if len(section.strip()) == 0: break 112 lines = section.strip().split('\n') 113 # The first line should have the header, containing the name of the metric. 114 header_match = re.match('^##+ ', lines[0]) 115 if not header_match: continue 116 metric = {} 117 metric['name'] = lines[0][len(header_match.group(0)):] 118 for i in range(1, len(lines)): 119 if len(lines[i]) == 0: 120 i += 1 121 break 122 match = tag_pattern.match(lines[i]) 123 assert match 124 assert IsTagKnown(match.group(1)), 'Unknown tag: "%s".' % (match.group(1)) 125 assert IsTagValid(match.group(1), match.group(2)), 'Invalid value "%s" ' \ 126 'for tag "%s".' % (match.group(2), match.group(1)) 127 metric[match.group(1)] = match.group(2) 128 assert i < len(lines), 'No summary found for "%s"' % metric['name'] 129 metric['summary'] = '\n'.join(lines[i:]) 130 assert 'owners' in metric, 'Must have owners for "%s"' % metric['name'] 131 assert 'enum' in metric or 'units' in metric, 'Metric "%s" must have ' \ 132 'a unit listed in "enum" or "units".' % metric['name'] 133 metrics.append(metric) 134 return metrics 135 136 137def CreateNode(tree, tag, text): 138 node = tree.createElement(tag) 139 node.appendChild(tree.createTextNode(text)) 140 return node 141 142 143def main(): 144 """ 145 argv[1]: The path to the md file. 146 argv[2]: The relative path of the xml file to be added. 147 """ 148 if len(sys.argv) != 3: 149 sys.stderr.write('Usage: %s <path-to-md-file> <path-to-histograms-file>\n' % 150 (sys.argv[0])) 151 sys.exit(1) 152 153 rel_path = sys.argv[2] 154 with Trace('Reading histograms.xml') as t: 155 xml_path = path_util.GetInputFile( 156 os.path.join('tools', 'metrics', 'histograms', rel_path)) 157 with open(xml_path, 'rb') as f: 158 raw_xml = f.read() 159 160 with Trace('Parsing xml') as t: 161 tree = xml.dom.minidom.parseString(raw_xml) 162 histograms = tree.getElementsByTagName('histograms') 163 if histograms.length != 1: 164 sys.stderr.write('histograms.xml should have exactly one "histograms" ' 165 'section.\n'); 166 sys.exit(1) 167 histograms = histograms[0] 168 169 with Trace('Parsing md file %s' % (sys.argv[1])) as t: 170 metrics = GetMetricsFromMdFile(sys.argv[1]) 171 172 with Trace('Adding parsed metrics') as t: 173 for metric in metrics: 174 node = tree.createElement('histogram') 175 node.setAttribute('name', metric['name']) 176 if 'units' in metric: 177 node.setAttribute('units', metric['units']) 178 elif 'enum' in metric: 179 node.setAttribute('enum', metric['enum']) 180 owners = metric['owners'].split(',') 181 for owner in owners: 182 node.appendChild(CreateNode(tree, 'owner', owner)) 183 node.appendChild(CreateNode(tree, 'summary', metric['summary'])) 184 # TODO(sad): This always appends the metric to the list. This should 185 # also update if there is an already existing metric, instead of adding a 186 # new one. 187 histograms.appendChild(node) 188 189 with Trace('Pretty printing into histograms.xml') as t: 190 new_xml = pretty_print.PrettyPrintHistogramsTree(tree) 191 with open(xml_path, 'wb') as f: 192 f.write(new_xml) 193 194 195if __name__ == '__main__': 196 main() 197