1# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
2# This file is part of the Biopython distribution and governed by your
3# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
4# Please see the LICENSE file that should have been included as part of this
5# package.
6"""Bio.SearchIO object to model a single database hit."""
7
8
9from itertools import chain
10
11from Bio.SearchIO._utils import allitems, optionalcascade, getattr_str
12
13from ._base import _BaseSearchObject
14from .hsp import HSP
15
16
17class Hit(_BaseSearchObject):
18    """Class representing a single database hit of a search result.
19
20    Hit objects are the second-level container in the SearchIO module. They
21    are the objects contained within a QueryResult (see QueryResult). They
22    themselves are container for HSP objects and will contain at least one
23    HSP.
24
25    To have a quick look at a Hit and its contents, invoke ``print`` on it::
26
27        >>> from Bio import SearchIO
28        >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
29        >>> hit = qresult[3]
30        >>> print(hit)
31        Query: 33211
32               mir_1
33          Hit: gi|301171322|ref|NR_035857.1| (86)
34               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
35         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
36                  #   E-value  Bit score    Span      Query range              Hit range
37               ----  --------  ---------  ------  ---------------  ---------------------
38                  0   8.9e-20     100.47      60           [1:61]                [13:73]
39                  1   3.3e-06      55.39      60           [0:60]                [13:73]
40
41    You can invoke ``len`` on a Hit object to see how many HSP objects it contains::
42
43        >>> len(hit)
44        2
45
46    Hit objects behave very similar to Python lists. You can retrieve the HSP
47    object inside a Hit using the HSP's integer index. Hit objects can also be
48    sliced, which will return a new Hit objects containing only the sliced HSPs::
49
50        # HSP items inside the Hit can be retrieved using its integer index
51        >>> hit[0]
52        HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments)
53
54        # slicing returns a new Hit
55        >>> hit
56        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
57        >>> hit[:1]
58        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps)
59        >>> print(hit[1:])
60        Query: 33211
61               mir_1
62          Hit: gi|301171322|ref|NR_035857.1| (86)
63               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
64         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
65                  #   E-value  Bit score    Span      Query range              Hit range
66               ----  --------  ---------  ------  ---------------  ---------------------
67                  0   3.3e-06      55.39      60           [0:60]                [13:73]
68
69    Hit objects provide ``filter`` and ``map`` methods, which are analogous to
70    Python's built-in ``filter`` and ``map`` except that they return a new Hit
71    object instead of a list.
72
73    Here is an example of using ``filter`` to select for HSPs whose e-value is
74    less than 1e-10::
75
76        >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
77        >>> filtered_hit = hit.filter(evalue_filter)
78        >>> len(hit)
79        2
80        >>> len(filtered_hit)
81        1
82        >>> print(filtered_hit)
83        Query: 33211
84               mir_1
85          Hit: gi|301171322|ref|NR_035857.1| (86)
86               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
87         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
88                  #   E-value  Bit score    Span      Query range              Hit range
89               ----  --------  ---------  ------  ---------------  ---------------------
90                  0   8.9e-20     100.47      60           [1:61]                [13:73]
91
92    There are also other methods which are counterparts of Python lists' methods
93    with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their
94    respective documentations for more details and examples of their usage.
95
96    """
97
98    # attributes we don't want to transfer when creating a new Hit class
99    # from this one
100    _NON_STICKY_ATTRS = ("_items",)
101
102    def __init__(self, hsps=(), id=None, query_id=None):
103        """Initialize a Hit object.
104
105        :param hsps: HSP objects contained in the Hit object
106        :type hsps: iterable yielding HSP
107        :param id: hit ID
108        :type id: string
109        :param query_id: query ID
110        :type query_id: string
111
112        If multiple HSP objects are used for initialization, they must all
113        have the same ``query_id``, ``query_description``, ``hit_id``, and
114        ``hit_description`` properties.
115        """
116        # default attribute values
117        self._id = id
118        self._id_alt = []
119        self._query_id = query_id
120        self._description = None
121        self._description_alt = []
122        self._query_description = None
123        self.attributes = {}
124        self.dbxrefs = []
125
126        # TODO - Move this into the for look below in case
127        # hsps is a single use iterator?
128        for attr in ("query_id", "query_description", "hit_id", "hit_description"):
129            # HACK: setting the if clause to '> 1' allows for empty hit objects.
130            # This makes it easier to work with file formats with unpredictable
131            # hit-hsp ordering. The empty hit object itself is nonfunctional,
132            # however, since all its cascading properties are empty.
133            if len({getattr(hsp, attr) for hsp in hsps}) > 1:
134                raise ValueError(
135                    "Hit object can not contain HSPs with more than one %s." % attr
136                )
137
138        self._items = []
139        for hsp in hsps:
140            # validate each HSP
141            self._validate_hsp(hsp)
142            # and store it them as an instance attribute
143            self.append(hsp)
144
145    def __repr__(self):
146        """Return string representation of Hit object."""
147        return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id, len(self))
148
149    def __iter__(self):
150        """Iterate over hsps."""
151        return iter(self.hsps)
152
153    def __len__(self):
154        """Return number of hsps."""
155        return len(self.hsps)
156
157    def __bool__(self):
158        """Return True if there are hsps."""
159        return bool(self.hsps)
160
161    def __contains__(self, hsp):
162        """Return True if hsp in items."""
163        return hsp in self._items
164
165    def __str__(self):
166        """Return a human readable summary of the Hit object."""
167        lines = []
168
169        # set query id line
170        qid_line = "Query: %s" % self.query_id
171        lines.append(qid_line)
172        if self.query_description:
173            line = "       %s" % self.query_description
174            line = line[:77] + "..." if len(line) > 80 else line
175            lines.append(line)
176
177        # set hit id line
178        hid_line = "  Hit: %s" % self.id
179        try:
180            seq_len = self.seq_len
181        except AttributeError:
182            pass
183        else:
184            hid_line += " (%i)" % seq_len
185        lines.append(hid_line)
186        if self.description:
187            line = "       %s" % self.description
188            line = line[:77] + "..." if len(line) > 80 else line
189            lines.append(line)
190
191        # set attributes lines
192        for key, value in sorted(self.attributes.items()):
193            lines.append(" %s: %s" % (key, value))
194
195        # set dbxrefs line
196        if self.dbxrefs:
197            lines.append("Database cross-references: " + ", ".join(self.dbxrefs))
198
199        # set hsp line and table
200        if not self.hsps:
201            lines.append(" HSPs: ?")
202        else:
203            lines.append(
204                " HSPs: %s  %s  %s  %s  %s  %s"
205                % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
206            )
207            pattern = "%11s  %8s  %9s  %6s  %15s  %21s"
208            lines.append(
209                pattern
210                % ("#", "E-value", "Bit score", "Span", "Query range", "Hit range")
211            )
212            lines.append(
213                pattern % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
214            )
215            for idx, hsp in enumerate(self.hsps):
216                # evalue
217                evalue = getattr_str(hsp, "evalue", fmt="%.2g")
218                # bitscore
219                bitscore = getattr_str(hsp, "bitscore", fmt="%.2f")
220                # alignment length
221                aln_span = getattr_str(hsp, "aln_span")
222                # query region
223                query_start = getattr_str(hsp, "query_start")
224                query_end = getattr_str(hsp, "query_end")
225                query_range = "[%s:%s]" % (query_start, query_end)
226                # max column length is 18
227                query_range = (
228                    query_range[:13] + "~]" if len(query_range) > 15 else query_range
229                )
230                # hit region
231                hit_start = getattr_str(hsp, "hit_start")
232                hit_end = getattr_str(hsp, "hit_end")
233                hit_range = "[%s:%s]" % (hit_start, hit_end)
234                hit_range = hit_range[:19] + "~]" if len(hit_range) > 21 else hit_range
235                # append the hsp row
236                lines.append(
237                    pattern % (idx, evalue, bitscore, aln_span, query_range, hit_range)
238                )
239
240        return "\n".join(lines)
241
242    def __getitem__(self, idx):
243        """Return the HSP object at the given index."""
244        # if key is slice, return a new Hit instance
245        if isinstance(idx, slice):
246            obj = self.__class__(self.hsps[idx])
247            self._transfer_attrs(obj)
248            return obj
249        return self._items[idx]
250
251    def __setitem__(self, idx, hsps):
252        """Assign hsps to index idx."""
253        # handle case if hsps is a list of hsp
254        if isinstance(hsps, (list, tuple)):
255            for hsp in hsps:
256                self._validate_hsp(hsp)
257        else:
258            self._validate_hsp(hsps)
259
260        self._items[idx] = hsps
261
262    def __delitem__(self, idx):
263        """Delete item of index idx."""
264        del self._items[idx]
265
266    # hsp properties #
267    def _validate_hsp(self, hsp):
268        """Validate an HSP object (PRIVATE).
269
270        Valid HSP objects have the same hit_id as the Hit object ID and the
271        same query_id as the Hit object's query_id.
272
273        """
274        if not isinstance(hsp, HSP):
275            raise TypeError("Hit objects can only contain HSP objects.")
276        # HACK: to make validation during __init__ work
277        if self._items:
278            if self.id is not None:
279                if hsp.hit_id != self.id:
280                    raise ValueError(
281                        "Expected HSP with hit ID %r, found %r instead."
282                        % (self.id, hsp.hit_id)
283                    )
284            else:
285                self.id = hsp.hit_id
286
287            if self.description is not None:
288                if hsp.hit_description != self.description:
289                    raise ValueError(
290                        "Expected HSP with hit description %r, found %r instead."
291                        % (self.description, hsp.hit_description)
292                    )
293            else:
294                self.description = hsp.hit_description
295
296            if self.query_id is not None:
297                if hsp.query_id != self.query_id:
298                    raise ValueError(
299                        "Expected HSP with query ID %r, found %r instead."
300                        % (self.query_id, hsp.query_id)
301                    )
302            else:
303                self.query_id = hsp.query_id
304
305            if self.query_description is not None:
306                if hsp.query_description != self.query_description:
307                    raise ValueError(
308                        "Expected HSP with query description %r, found %r instead."
309                        % (self.query_description, hsp.query_description)
310                    )
311            else:
312                self.query_description = hsp.query_description
313
314    # properties #
315    description = optionalcascade(
316        "_description", "hit_description", """Hit description"""
317    )
318    query_description = optionalcascade(
319        "_query_description",
320        "query_description",
321        """Description of the query that produced the hit""",
322    )
323    id = optionalcascade("_id", "hit_id", """Hit ID string.""")
324    query_id = optionalcascade(
325        "_query_id", "query_id", """ID string of the query that produced the hit"""
326    )
327    # returns all hsps
328    hsps = allitems(doc="""HSP objects contained in the Hit""")
329
330    @property
331    def id_all(self):
332        """Alternative ID(s) of the Hit."""
333        return [self.id] + self._id_alt
334
335    @property
336    def description_all(self):
337        """Alternative descriptions of the Hit."""
338        return [self.description] + self._description_alt
339
340    @property
341    def fragments(self):
342        """Access the HSPFragment objects contained in the Hit."""
343        return list(chain(*self._items))
344
345    # public methods #
346    def append(self, hsp):
347        """Add a HSP object to the end of Hit.
348
349        Parameters
350        hsp -- HSP object to append.
351
352        Any HSP object appended must have the same ``hit_id`` property as the
353        Hit object's ``id`` property and the same ``query_id`` property as the
354        Hit object's ``query_id`` property.
355
356        """
357        self._validate_hsp(hsp)
358        self._items.append(hsp)
359
360    def filter(self, func=None):
361        """Create new Hit object whose HSP objects pass the filter function.
362
363        :param func: function for filtering
364        :type func: callable, accepts HSP, returns bool
365
366        ``filter`` is analogous to Python's built-in ``filter`` function, except
367        that instead of returning a list it returns a ``Hit`` object. Here is an
368        example of using ``filter`` to select for HSPs having bitscores bigger
369        than 60::
370
371            >>> from Bio import SearchIO
372            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
373            >>> hit = qresult[3]
374            >>> evalue_filter = lambda hsp: hsp.bitscore > 60
375            >>> filtered_hit = hit.filter(evalue_filter)
376            >>> len(hit)
377            2
378            >>> len(filtered_hit)
379            1
380            >>> print(filtered_hit)
381            Query: 33211
382                   mir_1
383              Hit: gi|301171322|ref|NR_035857.1| (86)
384                   Pan troglodytes microRNA mir-520c (MIR520C), microRNA
385             HSPs: ----  --------  ---------  ------  ---------------  ---------------------
386                      #   E-value  Bit score    Span      Query range              Hit range
387                   ----  --------  ---------  ------  ---------------  ---------------------
388                      0   8.9e-20     100.47      60           [1:61]                [13:73]
389
390        """
391        hsps = list(filter(func, self.hsps))
392        if hsps:
393            obj = self.__class__(hsps)
394            self._transfer_attrs(obj)
395            return obj
396
397    def index(self, hsp):
398        """Return the index of a given HSP object, zero-based.
399
400        :param hsp: object to look up
401        :type hsp: HSP
402
403        """
404        return self._items.index(hsp)
405
406    def map(self, func=None):
407        """Create new Hit object, mapping the given function to its HSPs.
408
409        :param func: function for mapping
410        :type func: callable, accepts HSP, returns HSP
411
412        ``map`` is analogous to Python's built-in ``map`` function. It is applied to
413        all HSPs contained in the Hit object and returns a new Hit object.
414
415        """
416        if func is not None:
417            hsps = [func(x) for x in self.hsps[:]]  # this creates a shallow copy
418        else:
419            hsps = self.hsps[:]
420        if hsps:
421            obj = self.__class__(hsps)
422            self._transfer_attrs(obj)
423            return obj
424
425    def pop(self, index=-1):
426        """Remove and returns the HSP object at the specified index.
427
428        :param index: index of HSP object to pop
429        :type index: int
430
431        """
432        return self._items.pop(index)
433
434    def sort(self, key=None, reverse=False, in_place=True):
435        """Sort the HSP objects.
436
437        :param key: sorting function
438        :type key: callable, accepts HSP, returns key for sorting
439        :param reverse: whether to reverse sorting results or no
440        :type reverse: bool
441        :param in_place: whether to do in-place sorting or no
442        :type in_place: bool
443
444        ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort``
445        method. If you set the ``in_place`` argument to False, it will treat
446        return a new, sorted Hit object and keep the initial one unsorted
447
448        """
449        if in_place:
450            self._items.sort(key=key, reverse=reverse)
451        else:
452            hsps = self.hsps[:]
453            hsps.sort(key=key, reverse=reverse)
454            obj = self.__class__(hsps)
455            self._transfer_attrs(obj)
456            return obj
457
458
459# if not used as a module, run the doctest
460if __name__ == "__main__":
461    from Bio._utils import run_doctest
462
463    run_doctest()
464