1# Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
2# Revisions copyright 2008-2009 by Peter Cock.
3#
4# This file is part of the Biopython distribution and governed by your
5# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
6# Please see the LICENSE file that should have been included as part of this
7# package.
8#
9# Contact:       Leighton Pritchard, The James Hutton Institute,
10#                Invergowrie, Dundee, Scotland, DD2 5DA, UK
11#                Leighton.Pritchard@hutton.ac.uk
12################################################################################
13
14"""Graph module.
15
16Provides:
17 - GraphData - Contains data from which a graph will be drawn, and
18   information about its presentation
19
20For drawing capabilities, this module uses reportlab to draw and write
21the diagram: http://www.reportlab.com
22"""
23
24# ReportLab imports
25
26from reportlab.lib import colors
27
28from math import sqrt
29
30
31class GraphData:
32    """Graph Data.
33
34    Attributes:
35     - id    Unique identifier for the data
36     - data  Dictionary of describing the data, keyed by position
37     - name  String describing the data
38     - style String ('bar', 'heat', 'line') describing how to draw the data
39     - poscolor     colors.Color for drawing high (some styles) or all
40       values
41     - negcolor     colors.Color for drawing low values (some styles)
42     - linewidth     Int, thickness to draw the line in 'line' styles
43
44    """
45
46    def __init__(
47        self,
48        id=None,
49        data=None,
50        name=None,
51        style="bar",
52        color=colors.lightgreen,
53        altcolor=colors.darkseagreen,
54        center=None,
55        colour=None,
56        altcolour=None,
57    ):
58        """Initialize.
59
60        Arguments:
61         - id    Unique ID for the graph
62         - data  List of (position, value) tuples
63         - name  String describing the graph
64         - style String describing the presentation style ('bar', 'line',
65           'heat')
66         - color   colors.Color describing the color to draw all or the
67           'high' (some styles) values (overridden by backwards
68           compatible argument with UK spelling, colour).
69         - altcolor colors.Color describing the color to draw the 'low'
70           values (some styles only) (overridden by backwards
71           compatible argument with UK spelling, colour).
72         - center Value at which x-axis crosses y-axis.
73
74        """
75        # Let the UK spelling (colour) override the USA spelling (color)
76        if colour is not None:
77            color = colour
78        if altcolour is not None:
79            altcolor = altcolour
80
81        self.id = id  # Unique identifier for the graph
82        self.data = {}  # holds values, keyed by sequence position
83        if data is not None:
84            self.set_data(data)
85        self.name = name  # Descriptive string
86
87        # Attributes describing how the graph will be drawn
88        self.style = style  # One of 'bar', 'heat' or 'line'
89        self.poscolor = color  # Color to draw all, or 'high' values
90        self.negcolor = altcolor  # Color to draw 'low' values
91        self.linewidth = 2  # linewidth to use in line graphs
92        self.center = center  # value at which x-axis crosses y-axis
93
94    def set_data(self, data):
95        """Add data as a list of (position, value) tuples."""
96        for (pos, val) in data:  # Fill data dictionary
97            self.data[pos] = val
98
99    def get_data(self):
100        """Return data as a list of sorted (position, value) tuples."""
101        data = []
102        for xval in self.data:
103            yval = self.data[xval]
104            data.append((xval, yval))
105        data.sort()
106        return data
107
108    def add_point(self, point):
109        """Add a single point to the set of data as a (position, value) tuple."""
110        pos, val = point
111        self.data[pos] = val
112
113    def quartiles(self):
114        """Return (minimum, lowerQ, medianQ, upperQ, maximum) values as tuple."""
115        data = sorted(self.data.values())
116        datalen = len(data)
117        return (
118            data[0],
119            data[datalen // 4],
120            data[datalen // 2],
121            data[3 * datalen // 4],
122            data[-1],
123        )
124
125    def range(self):
126        """Return range of data as (start, end) tuple.
127
128        Returns the range of the data, i.e. its start and end points on
129        the genome as a (start, end) tuple.
130        """
131        positions = sorted(self.data)  # i.e. dict keys
132        # Return first and last positions in graph
133        # print(len(self.data))
134        return (positions[0], positions[-1])
135
136    def mean(self):
137        """Return the mean value for the data points (float)."""
138        data = list(self.data.values())
139        sum = 0.0
140        for item in data:
141            sum += float(item)
142        return sum / len(data)
143
144    def stdev(self):
145        """Return the sample standard deviation for the data (float)."""
146        data = list(self.data.values())
147        m = self.mean()
148        runtotal = 0.0
149        for entry in data:
150            runtotal += float((entry - m) ** 2)
151        # This is sample standard deviation; population stdev would involve
152        # division by len(data), rather than len(data)-1
153        return sqrt(runtotal / (len(data) - 1))
154
155    def __len__(self):
156        """Return the number of points in the data set."""
157        return len(self.data)
158
159    def __getitem__(self, index):
160        """Return data value(s) at the given position.
161
162        Given an integer representing position on the sequence
163        returns a float - the data value at the passed position.
164
165        If a slice, returns graph data from the region as a list or
166        (position, value) tuples. Slices with step are not supported.
167        """
168        if isinstance(index, int):
169            return self.data[index]
170        elif isinstance(index, slice):
171            # TODO - Why does it treat the end points both as inclusive?
172            # This doesn't match Python norms does it?
173            low = index.start
174            high = index.stop
175            if index.step is not None and index.step != 1:
176                raise ValueError
177            outlist = []
178            for pos in sorted(self.data):
179                if pos >= low and pos <= high:
180                    outlist.append((pos, self.data[pos]))
181            return outlist
182        else:
183            raise TypeError("Need an integer or a slice")
184
185    def __str__(self):
186        """Return a string describing the graph data."""
187        outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)]
188        outstr.append("Number of points: %d" % len(self.data))
189        outstr.append("Mean data value: %s" % self.mean())
190        outstr.append("Sample SD: %.3f" % self.stdev())
191        outstr.append(
192            "Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles()
193        )
194        outstr.append("Sequence Range: %s..%s" % self.range())
195        return "\n".join(outstr)
196