1# Copyright 2003-2008 by Leighton Pritchard. All rights reserved. 2# Revisions copyright 2008-2009 by Peter Cock. 3# 4# This file is part of the Biopython distribution and governed by your 5# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 6# Please see the LICENSE file that should have been included as part of this 7# package. 8# 9# Contact: Leighton Pritchard, The James Hutton Institute, 10# Invergowrie, Dundee, Scotland, DD2 5DA, UK 11# Leighton.Pritchard@hutton.ac.uk 12################################################################################ 13 14"""Graph module. 15 16Provides: 17 - GraphData - Contains data from which a graph will be drawn, and 18 information about its presentation 19 20For drawing capabilities, this module uses reportlab to draw and write 21the diagram: http://www.reportlab.com 22""" 23 24# ReportLab imports 25 26from reportlab.lib import colors 27 28from math import sqrt 29 30 31class GraphData: 32 """Graph Data. 33 34 Attributes: 35 - id Unique identifier for the data 36 - data Dictionary of describing the data, keyed by position 37 - name String describing the data 38 - style String ('bar', 'heat', 'line') describing how to draw the data 39 - poscolor colors.Color for drawing high (some styles) or all 40 values 41 - negcolor colors.Color for drawing low values (some styles) 42 - linewidth Int, thickness to draw the line in 'line' styles 43 44 """ 45 46 def __init__( 47 self, 48 id=None, 49 data=None, 50 name=None, 51 style="bar", 52 color=colors.lightgreen, 53 altcolor=colors.darkseagreen, 54 center=None, 55 colour=None, 56 altcolour=None, 57 ): 58 """Initialize. 59 60 Arguments: 61 - id Unique ID for the graph 62 - data List of (position, value) tuples 63 - name String describing the graph 64 - style String describing the presentation style ('bar', 'line', 65 'heat') 66 - color colors.Color describing the color to draw all or the 67 'high' (some styles) values (overridden by backwards 68 compatible argument with UK spelling, colour). 69 - altcolor colors.Color describing the color to draw the 'low' 70 values (some styles only) (overridden by backwards 71 compatible argument with UK spelling, colour). 72 - center Value at which x-axis crosses y-axis. 73 74 """ 75 # Let the UK spelling (colour) override the USA spelling (color) 76 if colour is not None: 77 color = colour 78 if altcolour is not None: 79 altcolor = altcolour 80 81 self.id = id # Unique identifier for the graph 82 self.data = {} # holds values, keyed by sequence position 83 if data is not None: 84 self.set_data(data) 85 self.name = name # Descriptive string 86 87 # Attributes describing how the graph will be drawn 88 self.style = style # One of 'bar', 'heat' or 'line' 89 self.poscolor = color # Color to draw all, or 'high' values 90 self.negcolor = altcolor # Color to draw 'low' values 91 self.linewidth = 2 # linewidth to use in line graphs 92 self.center = center # value at which x-axis crosses y-axis 93 94 def set_data(self, data): 95 """Add data as a list of (position, value) tuples.""" 96 for (pos, val) in data: # Fill data dictionary 97 self.data[pos] = val 98 99 def get_data(self): 100 """Return data as a list of sorted (position, value) tuples.""" 101 data = [] 102 for xval in self.data: 103 yval = self.data[xval] 104 data.append((xval, yval)) 105 data.sort() 106 return data 107 108 def add_point(self, point): 109 """Add a single point to the set of data as a (position, value) tuple.""" 110 pos, val = point 111 self.data[pos] = val 112 113 def quartiles(self): 114 """Return (minimum, lowerQ, medianQ, upperQ, maximum) values as tuple.""" 115 data = sorted(self.data.values()) 116 datalen = len(data) 117 return ( 118 data[0], 119 data[datalen // 4], 120 data[datalen // 2], 121 data[3 * datalen // 4], 122 data[-1], 123 ) 124 125 def range(self): 126 """Return range of data as (start, end) tuple. 127 128 Returns the range of the data, i.e. its start and end points on 129 the genome as a (start, end) tuple. 130 """ 131 positions = sorted(self.data) # i.e. dict keys 132 # Return first and last positions in graph 133 # print(len(self.data)) 134 return (positions[0], positions[-1]) 135 136 def mean(self): 137 """Return the mean value for the data points (float).""" 138 data = list(self.data.values()) 139 sum = 0.0 140 for item in data: 141 sum += float(item) 142 return sum / len(data) 143 144 def stdev(self): 145 """Return the sample standard deviation for the data (float).""" 146 data = list(self.data.values()) 147 m = self.mean() 148 runtotal = 0.0 149 for entry in data: 150 runtotal += float((entry - m) ** 2) 151 # This is sample standard deviation; population stdev would involve 152 # division by len(data), rather than len(data)-1 153 return sqrt(runtotal / (len(data) - 1)) 154 155 def __len__(self): 156 """Return the number of points in the data set.""" 157 return len(self.data) 158 159 def __getitem__(self, index): 160 """Return data value(s) at the given position. 161 162 Given an integer representing position on the sequence 163 returns a float - the data value at the passed position. 164 165 If a slice, returns graph data from the region as a list or 166 (position, value) tuples. Slices with step are not supported. 167 """ 168 if isinstance(index, int): 169 return self.data[index] 170 elif isinstance(index, slice): 171 # TODO - Why does it treat the end points both as inclusive? 172 # This doesn't match Python norms does it? 173 low = index.start 174 high = index.stop 175 if index.step is not None and index.step != 1: 176 raise ValueError 177 outlist = [] 178 for pos in sorted(self.data): 179 if pos >= low and pos <= high: 180 outlist.append((pos, self.data[pos])) 181 return outlist 182 else: 183 raise TypeError("Need an integer or a slice") 184 185 def __str__(self): 186 """Return a string describing the graph data.""" 187 outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)] 188 outstr.append("Number of points: %d" % len(self.data)) 189 outstr.append("Mean data value: %s" % self.mean()) 190 outstr.append("Sample SD: %.3f" % self.stdev()) 191 outstr.append( 192 "Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles() 193 ) 194 outstr.append("Sequence Range: %s..%s" % self.range()) 195 return "\n".join(outstr) 196