1# Copyright 2009 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Report generation class."""
16
17import csv
18import datetime
19import operator
20import os.path
21import platform
22
23# external dependencies (from third_party)
24import jinja2
25import simplejson
26
27import addr_util
28import charts
29import nameserver
30import nameserver_list
31import url_map
32import util
33
34# Only bother showing a percentage if we have this many tests.
35MIN_RELEVANT_COUNT = 50
36
37
38class ReportGenerator(object):
39  """Generate reports - ASCII, HTML, etc."""
40
41  def __init__(self, config, nameservers, results, index=None, geodata=None,
42               status_callback=None):
43    """Constructor.
44
45    Args:
46      config: A dictionary of configuration information.
47      nameservers: A list of nameserver objects to include in the report.
48      results: A dictionary of results from Benchmark.Run()
49      index: A dictionary of results for index hosts.
50      geodata: A dictionary of geographic information.
51      status_callback: where to send msg() calls.
52    """
53    self.nameservers = nameservers
54    self.results = results
55    self.index = index
56    self.config = config
57    self.geodata = geodata
58    self.status_callback = status_callback
59    self.cached_averages = {}
60    self.cached_summary = None
61
62  def msg(self, msg, **kwargs):
63    if self.status_callback:
64      self.status_callback(msg, **kwargs)
65
66  def ComputeAverages(self):
67    """Process all runs for all hosts, yielding an average for each host."""
68    if len(self.results) in self.cached_averages:
69      return self.cached_averages[len(self.results)]
70
71    records = []
72    for ns in self.results:
73      if ns.disabled:
74        continue
75      failure_count = 0
76      nx_count = 0
77      run_averages = []
78
79      for test_run in self.results[ns]:
80        # x: record, req_type, duration, response
81        total_count = len(test_run)
82        failure_count += len([x for x in test_run if not x[3]])
83        nx_count += len([x for x in test_run if x[3] and not x[3].answer])
84        duration = sum([x[2] for x in test_run])
85        run_averages.append(duration / len(test_run))
86
87      # This appears to be a safe use of averaging averages
88      overall_average = util.CalculateListAverage(run_averages)
89      (fastest, slowest) = self.FastestAndSlowestDurationForNameServer(ns)
90
91      records.append((ns, overall_average, run_averages, fastest, slowest,
92                      failure_count, nx_count, total_count))
93    self.cached_averages[len(self.results)] = records
94    return self.cached_averages[len(self.results)]
95
96  def FastestAndSlowestDurationForNameServer(self, ns):
97    """For a given nameserver, find the fastest/slowest non-error durations."""
98
99    fastest_duration = 2**32
100    slowest_duration = -1
101
102    durations = []
103    for test_run_results in self.results[ns]:
104      for (unused_host, unused_type, duration, response, unused_error) in test_run_results:
105        durations.append(duration)
106        if response and response.answer:
107          if duration < fastest_duration:
108            fastest_duration = duration
109        if duration > slowest_duration:
110          slowest_duration = duration
111
112    # If we have no error-free durations, settle for anything.
113    if fastest_duration == 2**32:
114      fastest_duration = min(durations)
115    if slowest_duration == -1:
116      slowest_duration = max(durations)
117    return (fastest_duration, slowest_duration)
118
119  def FastestNameServerResult(self):
120    """Process all runs for all hosts, yielding an average for each host."""
121    # TODO(tstromberg): This should not count queries which failed.
122    fastest = [(ns, self.FastestAndSlowestDurationForNameServer(ns)[0]) for ns in self.results]
123    return sorted(fastest, key=operator.itemgetter(1))
124
125  def BestOverallNameServer(self):
126    """Return the best nameserver we found."""
127
128    sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1))
129    hosts = [x[0] for x in sorted_averages]
130    for host in hosts:
131      if not host.is_failure_prone:
132        return host
133    # return something if none of them are good.
134    return hosts[0]
135
136  def NearestNameServers(self, count=2):
137    """Return the nameservers with the least latency."""
138    min_responses = sorted(self.FastestNameServerResult(),
139                           key=operator.itemgetter(1))
140    return [x[0] for x in min_responses][0:count]
141
142  def _LowestLatencyAsciiChart(self):
143    """Return a simple set of tuples to generate an ASCII chart from."""
144    fastest = self.FastestNameServerResult()
145    slowest_result = fastest[-1][1]
146    chart = []
147    for (ns, duration) in fastest:
148      textbar = util.DrawTextBar(duration, slowest_result)
149      chart.append((ns.name, textbar, duration))
150    return chart
151
152  def _MeanRequestAsciiChart(self):
153    """Creates an ASCII Chart of Mean Response Time."""
154    sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1))
155    max_result = sorted_averages[-1][1]
156    chart = []
157    for result in sorted_averages:
158      (ns, overall_mean) = result[0:2]
159      textbar = util.DrawTextBar(overall_mean, max_result)
160      chart.append((ns.name, textbar, overall_mean))
161    return chart
162
163  def CreateReport(self, format='ascii', output_fp=None, csv_path=None,
164                   sharing_url=None, sharing_state=None):
165    """Create a Report in a given format.
166
167    Args:
168      format: string (ascii, html, etc.) which defines what template to load.
169      output_fp: A File object to send the output to (optional)
170      csv_path: A string pathname to the CSV output to link to (optional)
171      sharing_url: A string URL where the results have been shared to. (optional)
172      sharing_state: A string showing what the shared result state is (optional)
173
174    Returns:
175      A rendered template (string)
176    """
177
178    # First generate all of the charts necessary.
179    if format == 'ascii':
180      lowest_latency = self._LowestLatencyAsciiChart()
181      mean_duration = self._MeanRequestAsciiChart()
182    else:
183      lowest_latency = None
184      mean_duration = None
185
186    sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1))
187    runs_data = [(x[0].name, x[2]) for x in sorted_averages]
188    mean_duration_url = charts.PerRunDurationBarGraph(runs_data)
189    min_duration_url = charts.MinimumDurationBarGraph(self.FastestNameServerResult())
190    distribution_url_200 = charts.DistributionLineGraph(self.DigestedResults(),
191                                                        scale=200)
192    distribution_url = charts.DistributionLineGraph(self.DigestedResults(),
193                                                    scale=self.config.timeout * 1000)
194
195    # Now generate all of the required textual information.
196    ns_summary = self._GenerateNameServerSummary()
197    best_ns = self.BestOverallNameServer()
198    recommended = [ns_summary[0]]
199    for row in sorted(ns_summary, key=operator.itemgetter('duration_min')):
200      if row['ip'] != ns_summary[0]['ip']:
201        recommended.append(row)
202      if len(recommended) == 3:
203        break
204
205    compare_title = 'Undecided'
206    compare_subtitle = 'Not enough servers to compare.'
207    compare_reference = None
208    for ns_record in ns_summary:
209      if ns_record.get('is_reference'):
210        if ns_record == ns_summary[0]:
211          compare_reference = ns_record
212          compare_title = 'N/A'
213          compare_subtitle = ''
214        elif len(ns_record['durations'][0]) >= MIN_RELEVANT_COUNT:
215          compare_reference = ns_record
216          compare_title = '%0.1f%%' % ns_summary[0]['diff']
217          compare_subtitle = 'Faster'
218        else:
219          compare_subtitle = 'Too few tests (needs %s)' % (MIN_RELEVANT_COUNT)
220        break
221
222    # Fragile, makes assumption about the CSV being in the same path as the HTML file
223    if csv_path:
224      csv_link = os.path.basename(csv_path)
225    else:
226      csv_link = None
227
228    template_name = '%s.tmpl' % format
229    template_path = util.FindDataFile(os.path.join('templates', template_name))
230    filtered_config = self.FilteredConfig()
231    template_dir = os.path.dirname(template_path)
232    env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
233    template = env.get_template(template_name)
234    sys_nameservers = nameserver_list.InternalNameServers()
235    if sys_nameservers:
236      system_primary = sys_nameservers[0]
237    else:
238      system_primary = None
239
240    rendered = template.render(
241        best_ns=best_ns,
242        system_primary=system_primary,
243        timestamp=datetime.datetime.now(),
244        lowest_latency=lowest_latency,
245        version=self.config.version,
246        compare_subtitle=compare_subtitle,
247        compare_title=compare_title,
248        compare_reference=compare_reference,
249        sharing_url=sharing_url,
250        sharing_state=sharing_state,
251        config=filtered_config,
252        mean_duration=mean_duration,
253        ns_summary=ns_summary,
254        mean_duration_url=mean_duration_url,
255        min_duration_url=min_duration_url,
256        distribution_url=distribution_url,
257        distribution_url_200=distribution_url_200,
258        recommended=recommended,
259        csv_link=csv_link
260    )
261    if output_fp:
262      output_fp.write(rendered)
263      output_fp.close()
264    else:
265      return rendered
266
267  def FilteredConfig(self):
268    """Generate a watered down config listing for our report."""
269    keys = [x for x in dir(self.config) if not x.startswith('_') and x not in ('config', 'site_url')]
270    config_items = []
271    for key in keys:
272      value = getattr(self.config, key)
273      # > values are ConfigParser internals. None values are just noise.
274      if isinstance(value, int) or isinstance(value, float) or isinstance(value, str):
275        config_items.append((key, value))
276    return sorted(config_items)
277
278  def DigestedResults(self):
279    """Return a tuple of nameserver and all associated durations."""
280    duration_data = []
281    for ns in self.results:
282      durations = []
283      for test_run_results in self.results[ns]:
284        durations += [x[2] for x in test_run_results]
285      duration_data.append((ns, durations))
286    return duration_data
287
288  def _GenerateNameServerSummary(self):
289    if self.cached_summary:
290      return self.cached_summary
291
292    nsdata = {}
293    sorted_averages = sorted(self.ComputeAverages(), key=operator.itemgetter(1))
294    placed_at = -1
295    fastest = {}
296    fastest_nonglobal = {}
297    reference = {}
298
299    # Fill in basic information for all nameservers, even those without scores.
300    fake_position = 1000
301    for ns in sorted(self.nameservers, key=operator.attrgetter('check_average')):
302      fake_position += 1
303
304      nsdata[ns] = {
305          'ip': ns.ip,
306          'name': ns.name,
307          'hostname': ns.hostname,
308          'version': ns.version,
309          'node_ids': list(ns.node_ids),
310          'sys_position': ns.system_position,
311          'is_failure_prone': ns.is_failure_prone,
312          'duration_min': float(ns.fastest_check_duration),
313          'is_global': ns.is_global,
314          'is_regional': ns.is_regional,
315          'is_custom': ns.is_custom,
316          'is_reference': False,
317          'is_disabled': bool(ns.disabled),
318          'check_average': ns.check_average,
319          'error_count': ns.error_count,
320          'timeout_count': ns.timeout_count,
321          'notes': url_map.CreateNoteUrlTuples(ns.notes),
322          'port_behavior': ns.port_behavior,
323          'position': fake_position
324      }
325
326    # Fill the scores in.
327    for (ns, unused_avg, run_averages, fastest, slowest, unused_failures, nx_count, unused_total) in sorted_averages:
328      placed_at += 1
329
330      durations = []
331      for _ in self.results[ns]:
332        durations.append([x[2] for x in self.results[ns][0]])
333
334      nsdata[ns].update({
335          'position': placed_at,
336          'overall_average': util.CalculateListAverage(run_averages),
337          'averages': run_averages,
338          'duration_min': float(fastest),
339          'duration_max': slowest,
340          'nx_count': nx_count,
341          'durations': durations,
342          'index': self._GenerateIndexSummary(ns),
343      })
344      # Determine which nameserver to refer to for improvement scoring
345      if not ns.disabled:
346        if ns.system_position == 0:
347          reference = ns
348        elif not fastest_nonglobal and not ns.is_global:
349          fastest_nonglobal = ns
350
351    # If no reference was found, use the fastest non-global nameserver record.
352    if not reference:
353      if fastest_nonglobal:
354        reference = fastest_nonglobal
355      else:
356        # The second ns.
357        reference = sorted_averages[1][0]
358
359    # Update the improvement scores for each nameserver.
360    for ns in nsdata:
361      if nsdata[ns]['ip'] != nsdata[reference]['ip']:
362        if 'overall_average' in nsdata[ns]:
363          nsdata[ns]['diff'] = ((nsdata[reference]['overall_average'] /
364                                 nsdata[ns]['overall_average']) - 1) * 100
365      else:
366        nsdata[ns]['is_reference'] = True
367
368#      print "--- DEBUG: %s ---" % ns
369#      print nsdata[ns]
370#      if 'index' in nsdata[ns]:
371#        print "index length: %s" % len(nsdata[ns]['index'])
372#      print ""
373
374    self.cached_summary = sorted(nsdata.values(), key=operator.itemgetter('position'))
375    return self.cached_summary
376
377  def _GenerateIndexSummary(self, ns):
378    # Get the meat out of the index data.
379    index = []
380    if ns in self.index:
381      for host, req_type, duration, response, unused_x in self.index[ns]:
382        answer_count, ttl = self._ResponseToCountTtlText(response)[0:2]
383        index.append((host, req_type, duration, answer_count, ttl,
384                      nameserver.ResponseToAscii(response)))
385    return index
386
387  def _GetPlatform(self):
388    my_platform = platform.system()
389    if my_platform == 'Darwin':
390      if os.path.exists('/usr/sbin/sw_vers') or os.path.exists('/usr/sbin/system_profiler'):
391        my_platform = 'Mac OS X'
392    if my_platform == 'Linux':
393      distro = platform.dist()[0]
394      if distro:
395        my_platform = 'Linux (%s)' % distro
396    return my_platform
397
398  def _CreateSharingData(self):
399    config = dict(self.FilteredConfig())
400    config['platform'] = self._GetPlatform()
401
402    # Purge sensitive information (be aggressive!)
403    purged_rows = []
404    for row in self._GenerateNameServerSummary():
405      # This will be our censored record.
406      p = dict(row)
407      p['notes'] = []
408      for note in row['notes']:
409        p['notes'].append({'text': addr_util.MaskStringWithIPs(note['text']), 'url': note['url']})
410
411      p['ip'], p['hostname'], p['name'] = addr_util.MaskPrivateHost(row['ip'], row['hostname'], row['name'])
412      if (addr_util.IsPrivateIP(row['ip']) or addr_util.IsLoopbackIP(row['ip'])
413          or addr_util.IsPrivateHostname(row['hostname'])):
414        p['node_ids'] = []
415        p['version'] = None
416      purged_rows.append(p)
417
418    return {'config': config, 'nameservers': purged_rows, 'geodata': self.geodata}
419
420  def CreateJsonData(self):
421    sharing_data = self._CreateSharingData()
422    return simplejson.dumps(sharing_data)
423
424  def _ResponseToCountTtlText(self, response):
425    """For a given DNS response, parse the most important details out.
426
427    Args:
428      response: DNS response
429
430    Returns:
431      tuple of (answer_count, ttl, answer_text)
432    """
433
434    answer_text = ''
435    answer_count = -1
436    ttl = -1
437    if response:
438      if response.answer:
439        answer_count = len(response.answer)
440        ttl = response.answer[0].ttl
441      answer_text = nameserver.ResponseToAscii(response)
442    return (answer_count, ttl, answer_text)
443
444  def SaveResultsToCsv(self, filename):
445    """Write out a CSV file with detailed results on each request.
446
447    Args:
448      filename: full path on where to save results (string)
449
450    Sample output:
451    nameserver, test_number, test, type, duration, answer_count, ttl
452    """
453    self.msg('Opening %s for write' % filename, debug=True)
454    csv_file = open(filename, 'w')
455    output = csv.writer(csv_file)
456    output.writerow(['IP', 'Name', 'Test_Num', 'Record',
457                     'Record_Type', 'Duration', 'TTL', 'Answer_Count',
458                     'Response'])
459    for ns in self.results:
460      self.msg('Saving detailed data for %s' % ns, debug=True)
461      for (test_run, test_results) in enumerate(self.results[ns]):
462        for (record, req_type, duration, response, error_msg) in test_results:
463          (answer_count, ttl, answer_text) = self._ResponseToCountTtlText(response)
464          output.writerow([ns.ip, ns.name, test_run, record, req_type, duration,
465                           ttl, answer_count, answer_text, error_msg])
466    csv_file.close()
467    self.msg('%s saved.' % filename, debug=True)
468
469