1# Copyright 2009 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Report generation class.""" 16 17import csv 18import datetime 19import operator 20import os.path 21import platform 22 23# external dependencies (from third_party) 24import jinja2 25import simplejson 26 27import addr_util 28import charts 29import nameserver 30import nameserver_list 31import url_map 32import util 33 34# Only bother showing a percentage if we have this many tests. 35MIN_RELEVANT_COUNT = 50 36 37 38class ReportGenerator(object): 39 """Generate reports - ASCII, HTML, etc.""" 40 41 def __init__(self, config, nameservers, results, index=None, geodata=None, 42 status_callback=None): 43 """Constructor. 44 45 Args: 46 config: A dictionary of configuration information. 47 nameservers: A list of nameserver objects to include in the report. 48 results: A dictionary of results from Benchmark.Run() 49 index: A dictionary of results for index hosts. 50 geodata: A dictionary of geographic information. 51 status_callback: where to send msg() calls. 52 """ 53 self.nameservers = nameservers 54 self.results = results 55 self.index = index 56 self.config = config 57 self.geodata = geodata 58 self.status_callback = status_callback 59 self.cached_averages = {} 60 self.cached_summary = None 61 62 def msg(self, msg, **kwargs): 63 if self.status_callback: 64 self.status_callback(msg, **kwargs) 65 66 def ComputeAverages(self): 67 """Process all runs for all hosts, yielding an average for each host.""" 68 if len(self.results) in self.cached_averages: 69 return self.cached_averages[len(self.results)] 70 71 records = [] 72 for ns in self.results: 73 if ns.disabled: 74 continue 75 failure_count = 0 76 nx_count = 0 77 run_averages = [] 78 79 for test_run in self.results[ns]: 80 # x: record, req_type, duration, response 81 total_count = len(test_run) 82 failure_count += len([x for x in test_run if not x[3]]) 83 nx_count += len([x for x in test_run if x[3] and not x[3].answer]) 84 duration = sum([x[2] for x in test_run]) 85 run_averages.append(duration / len(test_run)) 86 87 # This appears to be a safe use of averaging averages 88 overall_average = util.CalculateListAverage(run_averages) 89 (fastest, slowest) = self.FastestAndSlowestDurationForNameServer(ns) 90 91 records.append((ns, overall_average, run_averages, fastest, slowest, 92 failure_count, nx_count, total_count)) 93 self.cached_averages[len(self.results)] = records 94 return self.cached_averages[len(self.results)] 95 96 def FastestAndSlowestDurationForNameServer(self, ns): 97 """For a given nameserver, find the fastest/slowest non-error durations.""" 98 99 fastest_duration = 2**32 100 slowest_duration = -1 101 102 durations = [] 103 for test_run_results in self.results[ns]: 104 for (unused_host, unused_type, duration, response, unused_error) in test_run_results: 105 durations.append(duration) 106 if response and response.answer: 107 if duration < fastest_duration: 108 fastest_duration = duration 109 if duration > slowest_duration: 110 slowest_duration = duration 111 112 # If we have no error-free durations, settle for anything. 113 if fastest_duration == 2**32: 114 fastest_duration = min(durations) 115 if slowest_duration == -1: 116 slowest_duration = max(durations) 117 return (fastest_duration, slowest_duration) 118 119 def FastestNameServerResult(self): 120 """Process all runs for all hosts, yielding an average for each host.""" 121 # TODO(tstromberg): This should not count queries which failed. 122 fastest = [(ns, self.FastestAndSlowestDurationForNameServer(ns)[0]) for ns in self.results] 123 return sorted(fastest, key=operator.itemgetter(1)) 124 125 def BestOverallNameServer(self): 126 """Return the best nameserver we found.""" 127 128 sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1)) 129 hosts = [x[0] for x in sorted_averages] 130 for host in hosts: 131 if not host.is_failure_prone: 132 return host 133 # return something if none of them are good. 134 return hosts[0] 135 136 def NearestNameServers(self, count=2): 137 """Return the nameservers with the least latency.""" 138 min_responses = sorted(self.FastestNameServerResult(), 139 key=operator.itemgetter(1)) 140 return [x[0] for x in min_responses][0:count] 141 142 def _LowestLatencyAsciiChart(self): 143 """Return a simple set of tuples to generate an ASCII chart from.""" 144 fastest = self.FastestNameServerResult() 145 slowest_result = fastest[-1][1] 146 chart = [] 147 for (ns, duration) in fastest: 148 textbar = util.DrawTextBar(duration, slowest_result) 149 chart.append((ns.name, textbar, duration)) 150 return chart 151 152 def _MeanRequestAsciiChart(self): 153 """Creates an ASCII Chart of Mean Response Time.""" 154 sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1)) 155 max_result = sorted_averages[-1][1] 156 chart = [] 157 for result in sorted_averages: 158 (ns, overall_mean) = result[0:2] 159 textbar = util.DrawTextBar(overall_mean, max_result) 160 chart.append((ns.name, textbar, overall_mean)) 161 return chart 162 163 def CreateReport(self, format='ascii', output_fp=None, csv_path=None, 164 sharing_url=None, sharing_state=None): 165 """Create a Report in a given format. 166 167 Args: 168 format: string (ascii, html, etc.) which defines what template to load. 169 output_fp: A File object to send the output to (optional) 170 csv_path: A string pathname to the CSV output to link to (optional) 171 sharing_url: A string URL where the results have been shared to. (optional) 172 sharing_state: A string showing what the shared result state is (optional) 173 174 Returns: 175 A rendered template (string) 176 """ 177 178 # First generate all of the charts necessary. 179 if format == 'ascii': 180 lowest_latency = self._LowestLatencyAsciiChart() 181 mean_duration = self._MeanRequestAsciiChart() 182 else: 183 lowest_latency = None 184 mean_duration = None 185 186 sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1)) 187 runs_data = [(x[0].name, x[2]) for x in sorted_averages] 188 mean_duration_url = charts.PerRunDurationBarGraph(runs_data) 189 min_duration_url = charts.MinimumDurationBarGraph(self.FastestNameServerResult()) 190 distribution_url_200 = charts.DistributionLineGraph(self.DigestedResults(), 191 scale=200) 192 distribution_url = charts.DistributionLineGraph(self.DigestedResults(), 193 scale=self.config.timeout * 1000) 194 195 # Now generate all of the required textual information. 196 ns_summary = self._GenerateNameServerSummary() 197 best_ns = self.BestOverallNameServer() 198 recommended = [ns_summary[0]] 199 for row in sorted(ns_summary, key=operator.itemgetter('duration_min')): 200 if row['ip'] != ns_summary[0]['ip']: 201 recommended.append(row) 202 if len(recommended) == 3: 203 break 204 205 compare_title = 'Undecided' 206 compare_subtitle = 'Not enough servers to compare.' 207 compare_reference = None 208 for ns_record in ns_summary: 209 if ns_record.get('is_reference'): 210 if ns_record == ns_summary[0]: 211 compare_reference = ns_record 212 compare_title = 'N/A' 213 compare_subtitle = '' 214 elif len(ns_record['durations'][0]) >= MIN_RELEVANT_COUNT: 215 compare_reference = ns_record 216 compare_title = '%0.1f%%' % ns_summary[0]['diff'] 217 compare_subtitle = 'Faster' 218 else: 219 compare_subtitle = 'Too few tests (needs %s)' % (MIN_RELEVANT_COUNT) 220 break 221 222 # Fragile, makes assumption about the CSV being in the same path as the HTML file 223 if csv_path: 224 csv_link = os.path.basename(csv_path) 225 else: 226 csv_link = None 227 228 template_name = '%s.tmpl' % format 229 template_path = util.FindDataFile(os.path.join('templates', template_name)) 230 filtered_config = self.FilteredConfig() 231 template_dir = os.path.dirname(template_path) 232 env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir)) 233 template = env.get_template(template_name) 234 sys_nameservers = nameserver_list.InternalNameServers() 235 if sys_nameservers: 236 system_primary = sys_nameservers[0] 237 else: 238 system_primary = None 239 240 rendered = template.render( 241 best_ns=best_ns, 242 system_primary=system_primary, 243 timestamp=datetime.datetime.now(), 244 lowest_latency=lowest_latency, 245 version=self.config.version, 246 compare_subtitle=compare_subtitle, 247 compare_title=compare_title, 248 compare_reference=compare_reference, 249 sharing_url=sharing_url, 250 sharing_state=sharing_state, 251 config=filtered_config, 252 mean_duration=mean_duration, 253 ns_summary=ns_summary, 254 mean_duration_url=mean_duration_url, 255 min_duration_url=min_duration_url, 256 distribution_url=distribution_url, 257 distribution_url_200=distribution_url_200, 258 recommended=recommended, 259 csv_link=csv_link 260 ) 261 if output_fp: 262 output_fp.write(rendered) 263 output_fp.close() 264 else: 265 return rendered 266 267 def FilteredConfig(self): 268 """Generate a watered down config listing for our report.""" 269 keys = [x for x in dir(self.config) if not x.startswith('_') and x not in ('config', 'site_url')] 270 config_items = [] 271 for key in keys: 272 value = getattr(self.config, key) 273 # > values are ConfigParser internals. None values are just noise. 274 if isinstance(value, int) or isinstance(value, float) or isinstance(value, str): 275 config_items.append((key, value)) 276 return sorted(config_items) 277 278 def DigestedResults(self): 279 """Return a tuple of nameserver and all associated durations.""" 280 duration_data = [] 281 for ns in self.results: 282 durations = [] 283 for test_run_results in self.results[ns]: 284 durations += [x[2] for x in test_run_results] 285 duration_data.append((ns, durations)) 286 return duration_data 287 288 def _GenerateNameServerSummary(self): 289 if self.cached_summary: 290 return self.cached_summary 291 292 nsdata = {} 293 sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1)) 294 placed_at = -1 295 fastest = {} 296 fastest_nonglobal = {} 297 reference = {} 298 299 # Fill in basic information for all nameservers, even those without scores. 300 fake_position = 1000 301 for ns in sorted(self.nameservers, key=operator.attrgetter('check_average')): 302 fake_position += 1 303 304 nsdata[ns] = { 305 'ip': ns.ip, 306 'name': ns.name, 307 'hostname': ns.hostname, 308 'version': ns.version, 309 'node_ids': list(ns.node_ids), 310 'sys_position': ns.system_position, 311 'is_failure_prone': ns.is_failure_prone, 312 'duration_min': float(ns.fastest_check_duration), 313 'is_global': ns.is_global, 314 'is_regional': ns.is_regional, 315 'is_custom': ns.is_custom, 316 'is_reference': False, 317 'is_disabled': bool(ns.disabled), 318 'check_average': ns.check_average, 319 'error_count': ns.error_count, 320 'timeout_count': ns.timeout_count, 321 'notes': url_map.CreateNoteUrlTuples(ns.notes), 322 'port_behavior': ns.port_behavior, 323 'position': fake_position 324 } 325 326 # Fill the scores in. 327 for (ns, unused_avg, run_averages, fastest, slowest, unused_failures, nx_count, unused_total) in sorted_averages: 328 placed_at += 1 329 330 durations = [] 331 for _ in self.results[ns]: 332 durations.append([x[2] for x in self.results[ns][0]]) 333 334 nsdata[ns].update({ 335 'position': placed_at, 336 'overall_average': util.CalculateListAverage(run_averages), 337 'averages': run_averages, 338 'duration_min': float(fastest), 339 'duration_max': slowest, 340 'nx_count': nx_count, 341 'durations': durations, 342 'index': self._GenerateIndexSummary(ns), 343 }) 344 # Determine which nameserver to refer to for improvement scoring 345 if not ns.disabled: 346 if ns.system_position == 0: 347 reference = ns 348 elif not fastest_nonglobal and not ns.is_global: 349 fastest_nonglobal = ns 350 351 # If no reference was found, use the fastest non-global nameserver record. 352 if not reference: 353 if fastest_nonglobal: 354 reference = fastest_nonglobal 355 else: 356 # The second ns. 357 reference = sorted_averages[1][0] 358 359 # Update the improvement scores for each nameserver. 360 for ns in nsdata: 361 if nsdata[ns]['ip'] != nsdata[reference]['ip']: 362 if 'overall_average' in nsdata[ns]: 363 nsdata[ns]['diff'] = ((nsdata[reference]['overall_average'] / 364 nsdata[ns]['overall_average']) - 1) * 100 365 else: 366 nsdata[ns]['is_reference'] = True 367 368# print "--- DEBUG: %s ---" % ns 369# print nsdata[ns] 370# if 'index' in nsdata[ns]: 371# print "index length: %s" % len(nsdata[ns]['index']) 372# print "" 373 374 self.cached_summary = sorted(nsdata.values(), key=operator.itemgetter('position')) 375 return self.cached_summary 376 377 def _GenerateIndexSummary(self, ns): 378 # Get the meat out of the index data. 379 index = [] 380 if ns in self.index: 381 for host, req_type, duration, response, unused_x in self.index[ns]: 382 answer_count, ttl = self._ResponseToCountTtlText(response)[0:2] 383 index.append((host, req_type, duration, answer_count, ttl, 384 nameserver.ResponseToAscii(response))) 385 return index 386 387 def _GetPlatform(self): 388 my_platform = platform.system() 389 if my_platform == 'Darwin': 390 if os.path.exists('/usr/sbin/sw_vers') or os.path.exists('/usr/sbin/system_profiler'): 391 my_platform = 'Mac OS X' 392 if my_platform == 'Linux': 393 distro = platform.dist()[0] 394 if distro: 395 my_platform = 'Linux (%s)' % distro 396 return my_platform 397 398 def _CreateSharingData(self): 399 config = dict(self.FilteredConfig()) 400 config['platform'] = self._GetPlatform() 401 402 # Purge sensitive information (be aggressive!) 403 purged_rows = [] 404 for row in self._GenerateNameServerSummary(): 405 # This will be our censored record. 406 p = dict(row) 407 p['notes'] = [] 408 for note in row['notes']: 409 p['notes'].append({'text': addr_util.MaskStringWithIPs(note['text']), 'url': note['url']}) 410 411 p['ip'], p['hostname'], p['name'] = addr_util.MaskPrivateHost(row['ip'], row['hostname'], row['name']) 412 if (addr_util.IsPrivateIP(row['ip']) or addr_util.IsLoopbackIP(row['ip']) 413 or addr_util.IsPrivateHostname(row['hostname'])): 414 p['node_ids'] = [] 415 p['version'] = None 416 purged_rows.append(p) 417 418 return {'config': config, 'nameservers': purged_rows, 'geodata': self.geodata} 419 420 def CreateJsonData(self): 421 sharing_data = self._CreateSharingData() 422 return simplejson.dumps(sharing_data) 423 424 def _ResponseToCountTtlText(self, response): 425 """For a given DNS response, parse the most important details out. 426 427 Args: 428 response: DNS response 429 430 Returns: 431 tuple of (answer_count, ttl, answer_text) 432 """ 433 434 answer_text = '' 435 answer_count = -1 436 ttl = -1 437 if response: 438 if response.answer: 439 answer_count = len(response.answer) 440 ttl = response.answer[0].ttl 441 answer_text = nameserver.ResponseToAscii(response) 442 return (answer_count, ttl, answer_text) 443 444 def SaveResultsToCsv(self, filename): 445 """Write out a CSV file with detailed results on each request. 446 447 Args: 448 filename: full path on where to save results (string) 449 450 Sample output: 451 nameserver, test_number, test, type, duration, answer_count, ttl 452 """ 453 self.msg('Opening %s for write' % filename, debug=True) 454 csv_file = open(filename, 'w') 455 output = csv.writer(csv_file) 456 output.writerow(['IP', 'Name', 'Test_Num', 'Record', 457 'Record_Type', 'Duration', 'TTL', 'Answer_Count', 458 'Response']) 459 for ns in self.results: 460 self.msg('Saving detailed data for %s' % ns, debug=True) 461 for (test_run, test_results) in enumerate(self.results[ns]): 462 for (record, req_type, duration, response, error_msg) in test_results: 463 (answer_count, ttl, answer_text) = self._ResponseToCountTtlText(response) 464 output.writerow([ns.ip, ns.name, test_run, record, req_type, duration, 465 ttl, answer_count, answer_text, error_msg]) 466 csv_file.close() 467 self.msg('%s saved.' % filename, debug=True) 468 469